Прогнозирование оттока

Финальное решение и экономическая модель

Глазунов А.В.

Задачей данного ноутбука является построение наилучшей модели посредством экспериментов с разными моделями бустинга, оверсэмплингом, дополнительными признаками, а также дальнейшее построение прототипа экономической модели для оценки применимости машинного обучения для бизнеса.

Подключение диска и загрузка библиотек

In [1]:
from google.colab import drive
drive.mount('/content/gdrive',force_remount=True)
Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive
In [2]:
cd /content/gdrive/'My Drive'/Colab_Notebooks/CHURN_prediction_competition
/content/gdrive/My Drive/Colab_Notebooks/CHURN_prediction_competition
In [3]:
pip install category_encoders
Collecting category_encoders
  Downloading https://files.pythonhosted.org/packages/44/57/fcef41c248701ee62e8325026b90c432adea35555cbc870aff9cfba23727/category_encoders-2.2.2-py2.py3-none-any.whl (80kB)
     |████████████████████████████████| 81kB 4.1MB/s 
Requirement already satisfied: statsmodels>=0.9.0 in /usr/local/lib/python3.6/dist-packages (from category_encoders) (0.10.2)
Requirement already satisfied: scikit-learn>=0.20.0 in /usr/local/lib/python3.6/dist-packages (from category_encoders) (0.22.2.post1)
Requirement already satisfied: scipy>=1.0.0 in /usr/local/lib/python3.6/dist-packages (from category_encoders) (1.4.1)
Requirement already satisfied: patsy>=0.5.1 in /usr/local/lib/python3.6/dist-packages (from category_encoders) (0.5.1)
Requirement already satisfied: pandas>=0.21.1 in /usr/local/lib/python3.6/dist-packages (from category_encoders) (1.0.5)
Requirement already satisfied: numpy>=1.14.0 in /usr/local/lib/python3.6/dist-packages (from category_encoders) (1.18.5)
Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.6/dist-packages (from scikit-learn>=0.20.0->category_encoders) (0.15.1)
Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from patsy>=0.5.1->category_encoders) (1.12.0)
Requirement already satisfied: python-dateutil>=2.6.1 in /usr/local/lib/python3.6/dist-packages (from pandas>=0.21.1->category_encoders) (2.8.1)
Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas>=0.21.1->category_encoders) (2018.9)
Installing collected packages: category-encoders
Successfully installed category-encoders-2.2.2
In [4]:
pip install catboost
Collecting catboost
  Downloading https://files.pythonhosted.org/packages/b2/aa/e61819d04ef2bbee778bf4b3a748db1f3ad23512377e43ecfdc3211437a0/catboost-0.23.2-cp36-none-manylinux1_x86_64.whl (64.8MB)
     |████████████████████████████████| 64.8MB 55kB/s 
Requirement already satisfied: numpy>=1.16.0 in /usr/local/lib/python3.6/dist-packages (from catboost) (1.18.5)
Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from catboost) (1.12.0)
Requirement already satisfied: graphviz in /usr/local/lib/python3.6/dist-packages (from catboost) (0.10.1)
Requirement already satisfied: pandas>=0.24.0 in /usr/local/lib/python3.6/dist-packages (from catboost) (1.0.5)
Requirement already satisfied: matplotlib in /usr/local/lib/python3.6/dist-packages (from catboost) (3.2.2)
Requirement already satisfied: plotly in /usr/local/lib/python3.6/dist-packages (from catboost) (4.4.1)
Requirement already satisfied: scipy in /usr/local/lib/python3.6/dist-packages (from catboost) (1.4.1)
Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas>=0.24.0->catboost) (2018.9)
Requirement already satisfied: python-dateutil>=2.6.1 in /usr/local/lib/python3.6/dist-packages (from pandas>=0.24.0->catboost) (2.8.1)
Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.6/dist-packages (from matplotlib->catboost) (0.10.0)
Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->catboost) (1.2.0)
Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->catboost) (2.4.7)
Requirement already satisfied: retrying>=1.3.3 in /usr/local/lib/python3.6/dist-packages (from plotly->catboost) (1.3.3)
Installing collected packages: catboost
Successfully installed catboost-0.23.2
In [6]:
from sklearn.base import BaseEstimator, TransformerMixin


import warnings
warnings.filterwarnings('ignore')
import seaborn as sns
from matplotlib import pyplot as plt
sns.set_style("darkgrid")
import pandas as pd
import numpy as np
from scipy import stats
import itertools
import random
from category_encoders import LeaveOneOutEncoder,TargetEncoder, BinaryEncoder,SumEncoder,BackwardDifferenceEncoder
from sklearn import model_selection, metrics, pipeline, preprocessing,impute
from sklearn.model_selection import train_test_split,StratifiedKFold, cross_val_score,GridSearchCV
from sklearn.feature_selection import SelectFromModel

from imblearn import over_sampling
import gc
from tqdm import tqdm_notebook

from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
import xgboost as xgb
import lightgbm as lgb
from catboost import CatBoostClassifier,Pool
from sklearn.linear_model import SGDClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score,roc_auc_score,f1_score,classification_report,roc_curve, confusion_matrix,average_precision_score,precision_recall_curve,auc,precision_score,recall_score

Загрузка данных, инициализация функций, эксперименты с моделью

In [7]:
features = pd.read_csv('orange_small_churn_data.train')
labels = pd.read_csv('orange_small_churn_labels.train',header= None,names=['target'])
feats_train,feats_val,labels_train,labels_val = train_test_split(features,labels, test_size = 0.3,\
                                                                   shuffle=True,random_state=42,\
                                                                   stratify = labels)
feats_train.head()
Out[7]:
Var1 Var2 Var3 Var4 Var5 Var6 Var7 Var8 Var9 Var10 Var11 Var12 Var13 Var14 Var15 Var16 Var17 Var18 Var19 Var20 Var21 Var22 Var23 Var24 Var25 Var26 Var27 Var28 Var29 Var30 Var31 Var32 Var33 Var34 Var35 Var36 Var37 Var38 Var39 Var40 ... Var191 Var192 Var193 Var194 Var195 Var196 Var197 Var198 Var199 Var200 Var201 Var202 Var203 Var204 Var205 Var206 Var207 Var208 Var209 Var210 Var211 Var212 Var213 Var214 Var215 Var216 Var217 Var218 Var219 Var220 Var221 Var222 Var223 Var224 Var225 Var226 Var227 Var228 Var229 Var230
6892 NaN NaN NaN 0.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.0 0.0 0.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.0 NaN NaN NaN ... NaN DHeTmBftjz RO12 NaN taul 1K8T 7gSz 8ij6Lg8 LJF4fPp NaN NaN Bcur 9_Y1 Z5OU VpdQ NaN me75fM6ugJ kIsH NaN uKAI Mtgm NhsEn4L NaN NaN NaN 7WwCtIM 1GbF cJvF AU8pNoi 7OmVzos oslk 76DJixu LM8l689qOp NaN NaN Qu4f RAYp F2FyR07IdsN7I NaN NaN
34821 NaN NaN NaN NaN 0.0 NaN NaN NaN NaN 0.0 NaN NaN NaN NaN NaN 3.36 NaN NaN NaN NaN NaN NaN 0.0 NaN NaN 0.0 0.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN 2jigUH7ejg RO12 NaN taul 1K8T dm89 LG0vbUP n1zVHpT8NN NaN NaN 5FzM 9_Y1 DmlN 09_Q NaN me75fM6ugJ kIsH NaN uKAI L84s NhsEn4L NaN NaN NaN mAja5EA FJ56cYO cJvF NaN sE0uLpj oslk G9maF5M NaN NaN NaN w_Ub RAYp F2FyR07IdsN7I NaN NaN
34190 NaN NaN NaN NaN NaN 98.0 0.0 NaN NaN NaN NaN NaN 0.0 NaN NaN NaN NaN NaN NaN NaN 152.0 190.0 NaN 0.0 64.0 NaN NaN 86.96 NaN NaN NaN NaN NaN NaN 5.0 NaN NaN 4107204.0 NaN NaN ... NaN zcRZptzip9 RO12 NaN taul 1K8T USOt pro8v8X CsjH_hi NaN NaN rUBc 9_Y1 t_4G VpdQ IYzP me75fM6ugJ sBgB NaN uKAI L84s NhsEn4L NaN NaN NaN mAjDcoz xYrN cJvF FzaX meWVy8V oslk DQ3u3MC LM8l689qOp NaN NaN Qu4f RAYp F2FyR07IdsN7I NaN NaN
24541 NaN NaN NaN NaN NaN 938.0 7.0 NaN NaN NaN NaN NaN 520.0 NaN NaN NaN NaN NaN NaN NaN 148.0 185.0 NaN 2.0 96.0 NaN NaN 186.64 NaN NaN NaN NaN NaN NaN 0.0 NaN NaN 749586.0 NaN NaN ... NaN mzKvyx8zhV 2Knk1KF NaN taul 1K8T vSNn fhk21Ss Hz673939hSRjL 0eHFhrP NaN W9XQ 9_Y1 QMes VpdQ wMei 7M47J5GA0pTYIFxg5uy kIsH NaN uKAI L84s Ie_5MZs NaN Go_XylT NaN XTbjhEX F6F0 cJvF FzaX 4UxGlow zCkv catzS2D jySVZNlOJy NaN ELof WqMG ZI9m TCU50_Yjmm6GIBZ0lL_ mj86 NaN
31483 NaN NaN NaN NaN NaN 602.0 7.0 NaN NaN NaN NaN NaN 88.0 NaN NaN NaN NaN NaN NaN NaN 0.0 0.0 NaN NaN 0.0 NaN NaN 166.56 NaN NaN NaN NaN NaN NaN 0.0 NaN NaN 0.0 NaN NaN ... NaN 639qrQK2Mx RO12 NaN taul 1K8T AHgj creg0bq 5q1hF23 NaN NaN 6Yf9 9_Y1 15m3 sJzTlal IYzP me75fM6ugJ kIsH NaN uKAI L84s NhsEn4L NaN NaN NaN mAjbk_S oLcf cJvF FzaX VgKv48t oslk nRgz4Af LM8l689qOp NaN NaN FSa2 RAYp F2FyR07IdsN7I NaN NaN

5 rows × 230 columns

In [8]:
test_data = pd.read_csv('orange_small_churn_test_data.csv', index_col='ID')
test_data.head()
Out[8]:
Var1 Var2 Var3 Var4 Var5 Var6 Var7 Var8 Var9 Var10 Var11 Var12 Var13 Var14 Var15 Var16 Var17 Var18 Var19 Var20 Var21 Var22 Var23 Var24 Var25 Var26 Var27 Var28 Var29 Var30 Var31 Var32 Var33 Var34 Var35 Var36 Var37 Var38 Var39 Var40 ... Var191 Var192 Var193 Var194 Var195 Var196 Var197 Var198 Var199 Var200 Var201 Var202 Var203 Var204 Var205 Var206 Var207 Var208 Var209 Var210 Var211 Var212 Var213 Var214 Var215 Var216 Var217 Var218 Var219 Var220 Var221 Var222 Var223 Var224 Var225 Var226 Var227 Var228 Var229 Var230
ID
0 NaN NaN NaN NaN NaN 1225.0 7.0 NaN NaN NaN NaN NaN 2352.0 NaN NaN NaN NaN NaN NaN NaN 180.0 225.0 NaN 0.0 56.0 NaN NaN 388.08 NaN NaN NaN NaN NaN NaN 0.0 NaN NaN 6726960.0 NaN NaN ... NaN P1WvyxLp3Z 2Knk1KF NaN taul 1K8T 0Xwj PHNvXy8 xUOdRmdN20 IBm9AvG NaN skcI HLqf vzJD NaN lVqb NKv3VA1BpP sBgB NaN uKAI L84s Ie_5MZs NaN xRz9wph NaN CWNY2py jgOV UYBR FzaX UF16siJ zCkv APgdzOv jySVZNlOJy NaN ELof xb3V 6fzt Zy3gnGM NaN NaN
1 NaN NaN NaN NaN NaN 896.0 14.0 NaN NaN NaN NaN NaN 1560.0 NaN NaN NaN NaN NaN NaN NaN 112.0 140.0 NaN 0.0 96.0 NaN NaN 133.12 NaN NaN NaN NaN NaN NaN 0.0 NaN NaN 0.0 NaN NaN ... NaN bZkvyxLkBI RO12 NaN taul 1K8T 0Xwj 6KF0k8W V4E_TU9097 NaN NaN CZsd F3hy vzJD 09_Q IYzP me75fM6ugJ kIsH NaN uKAI L84s NhsEn4L NaN NaN NaN TDcECyH PDRj cJvF FzaX ot6oLzk oslk IIvC99a LM8l689qOp NaN NaN xb3V RAYp F2FyR07IdsN7I NaN NaN
2 NaN NaN NaN NaN NaN 791.0 7.0 NaN NaN NaN NaN NaN 2824.0 NaN NaN NaN NaN NaN NaN NaN 172.0 215.0 NaN 4.0 16.0 NaN NaN 324.48 NaN NaN NaN NaN NaN NaN 0.0 NaN NaN 2988486.0 NaN NaN ... NaN 75lTmBtFkL RO12 SEuy taul 1K8T AnrR ckoNVBU _jTP8ioIlJ JnrRQD4 smXZ PwdO 9_Y1 C6Eu VpdQ sYC_ me75fM6ugJ kIsH NaN uKAI Mtgm NhsEn4L NaN Iy8LM_S NaN kq0aHkC laMb UYBR FzaX 6VLNqhB oslk 6YSocsg LM8l689qOp NaN kG3k rgKb RAYp F2FyR07IdsN7I mj86 NaN
3 NaN NaN NaN NaN NaN 2296.0 7.0 NaN NaN NaN NaN NaN 3732.0 NaN NaN NaN NaN NaN NaN NaN 480.0 600.0 NaN 10.0 104.0 NaN NaN 286.96 NaN NaN NaN NaN NaN NaN 0.0 NaN NaN 6637740.0 NaN NaN ... NaN YddTmBtueT RO12 SEuy taul 1K8T 487l 77f44U8 II0S8f9 a1lFLoc smXZ SOkz 9_Y1 C6Eu VpdQ IYzP me75fM6ugJ kIsH NaN uKAI L84s NhsEn4L NaN GccbB3h NaN mAjbk_S qLRt UYBR FzaX tzp8jNM oslk 5nQ7A2G jySVZNlOJy NaN kG3k rgKb RAYp F2FyR07IdsN7I am7c NaN
4 8.0 NaN NaN NaN NaN NaN NaN NaN 28.0 NaN NaN 0.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.0 10.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN mCGq9ayE15 RO12 NaN taul 1K8T lK27 esxkA1P 767sa0XN9l NaN NaN kVZt 9_Y1 vm5R 09_Q NaN me75fM6ugJ kIsH NaN uKAI Mtgm NhsEn4L NaN NaN eGzu 7Wwvby0 XqfQ UYBR FzaX EPImS85 oslk MI8s5nE LM8l689qOp NaN NaN 7P5s RAYp F2FyR07IdsN7I NaN NaN

5 rows × 230 columns

In [9]:
def null_map(data):
    data_to_heat = data.isnull()
    data_to_heat.head()
    with plt.xkcd():
        plt.figure(figsize=(20,14))
        colors = ['#000099', '#ffff00'] 
        sns.heatmap(data_to_heat,cmap = sns.color_palette(colors));
null_map(feats_train)
In [9]:
def feat_classif_clean_nan(data,prop_nan=0.3,len_uniq_num_min=20,len_uniq_min=2):
    
    #Функция первичного отбора (по NaN) и классификации признаков (категориальные или числовые),
    #можем менять допустимую долю NaN - prop_nan, а также пороговые количества уникальных значений
    #Возвращает списки с названиями столбцов
    
    #Обработка числовых признаков
    data_float = data.select_dtypes(include = ['float64'],).copy()
    
    cols_float_cat = []
    cols_float_num = []
    for col in data_float.columns:
        # Удаляем рассмотрения признаки,где доля nan больше prop_nan
        if data[col].dropna().shape[0]/data[col].shape[0] > 1-prop_nan:
            len_unique = np.unique(data[col].dropna()).shape[0]    
            if len_unique <= len_uniq_num_min:#порог для отнесения числового признака к категориальным                
                if len_unique >= len_uniq_min:
                    cols_float_cat.append(col)                            
            else:
                cols_float_num.append(col) 
        
            
    #Обработка нечисловых признаков
    data_not_num = data.select_dtypes(include=['object']).copy()

    cols_cat=[]
    for col in data_not_num.columns.tolist():
        # Удаляем из рассмотрения признаки,где доля nan  больше prop_nan
        if data[col].dropna().shape[0]/data[col].shape[0] > 1-prop_nan:
            len_unique = np.unique(data[col].dropna()).shape[0]
            if len_unique >= len_uniq_min:
                cols_cat.append(col)         
                
    num_columns = cols_float_num+['Var73'] #числовые признаки
    cat_columns = cols_cat+cols_float_cat #категориальные признаки
    
    return num_columns,cat_columns
In [10]:
def fe_cat_clean_too_high_cardinality(data,cat_columns,max_prop_unique=1):    
    #Удаляем неинформативные признаки (слишком много категорий, 
    # доля уникальных из заполненных приближается к max_prop_unique)
    #Возвращает список с названиями столбцов    
    cat_features = data[cat_columns].copy()
    cat_columns_new = []  
    
    for col in cat_columns:        
        level_counts = cat_features[col].value_counts()        
        #Проверяем условие не слишком большого числа категорий (доля от количества объектов - max_prop_unique)
        if level_counts.shape[0]/level_counts.sum() < max_prop_unique:
            cat_columns_new.append(col)#добавляем признак         
    
    return cat_columns_new
In [11]:
def fe_cat_cardinality_classification(data,cat_columns,N_lim=10):
    #Классифицируем категориальные признаки по количеству категорий
     #Возвращает списки с названиями столбцов
    cat_features = data[cat_columns].copy()
    
    low_cardinality_columns = []
    high_cardinality_columns = []
    for col in cat_columns:
        if cat_features[col].unique().shape[0] < N_lim:
            low_cardinality_columns.append(col)
        else:
            high_cardinality_columns.append(col)
            
    return low_cardinality_columns, high_cardinality_columns
In [12]:
def data_preprocessor(data,params):
    #return indices fo each category
    
    data=data.copy()
    [prop_nan,max_prop_unique,N_lim] = params
    
    
    numeric_data_columns,categorical_data_columns0 = feat_classif_clean_nan(data,prop_nan=prop_nan)
    numeric_data_indices = np.array([(column in numeric_data_columns) for column in data.columns], dtype = bool)


    categorical_data_columns = fe_cat_clean_too_high_cardinality(data,categorical_data_columns0,max_prop_unique=max_prop_unique)
    
    low_cardinality_cat_columns ,high_cardinality_cat_columns = fe_cat_cardinality_classification(data,\
                                                                                              categorical_data_columns,N_lim=N_lim)
    low_card_cat_data_indices = np.array([(column in low_cardinality_cat_columns) for column in data.columns], dtype = bool)
    high_card_cat_data_indices = np.array([(column in high_cardinality_cat_columns) for column in data.columns], dtype = bool)
    
    
    indices_lists = [numeric_data_indices,low_card_cat_data_indices,high_card_cat_data_indices]
    return indices_lists
In [13]:
class MyCatMergerNaInputer(BaseEstimator, TransformerMixin):
    
    
    def __init__(self,occurrence_med_prop = 0.2,fill_value='NaN'):
        self.fill_value = fill_value
        self.var_cat_dict = {}
        self.occurrence_med_prop = occurrence_med_prop
        
        
    def fit(self,X,y=None):
        
        df = pd.DataFrame(X)
        df = df.fillna(self.fill_value)
        df = pd.DataFrame(df,dtype=str)
        
        for col in df.columns:
            level_counts = df[col].value_counts()
            lvl_c_median_norm = level_counts/level_counts.median()
            small_cat_list = lvl_c_median_norm[lvl_c_median_norm < self.occurrence_med_prop].index
            self.var_cat_dict[col] = small_cat_list   
       
        return self
    
    
    def transform(self,X,y=None):
        
        df = pd.DataFrame(X)
        df = df.fillna(self.fill_value)
        df = pd.DataFrame(df,dtype=str)
        
        for col in df.columns:
            df[col] = df[col].apply(lambda x: f'{col}_other_cat' if x in self.var_cat_dict[col] else x)
            
        return df.values
In [14]:
def transformation(low_level_transformers,indices_lists):
    [numeric_imputer,numeric_scaler,cat_imputer1,cat_imputer2,\
                low_cardinality_cat_transformer,high_cardinality_cat_transformer] = low_level_transformers
    [numeric_data_indices,low_card_cat_data_indices,high_card_cat_data_indices] = indices_lists
    return pipeline.FeatureUnion(transformer_list =[
    #numeric
    ('numeric_features_processing', pipeline.Pipeline(steps = [
    ('selecting', preprocessing.FunctionTransformer(lambda data: data[:, numeric_data_indices])),
    ('fill_nan',numeric_imputer),
    ('scaling', numeric_scaler)            
                        ])),
    #categorical
    ('cat_features_processing', pipeline.FeatureUnion(transformer_list =[
        #number of categories < N_lim
        ('low_cardinality_transformation', pipeline.Pipeline(steps = [
                ('selecting', preprocessing.FunctionTransformer(lambda data: data[:, low_card_cat_data_indices])),
                ('fill_nan',cat_imputer1 ),
                ('low_cardinality_cat_transformer', low_cardinality_cat_transformer)            
                        ])),
        #number of categories >= N_lim
        ('high_cardinality_transformation', pipeline.Pipeline(steps = [
                ('selecting', preprocessing.FunctionTransformer(lambda data: data[:, high_card_cat_data_indices])),
                ('fill_nan',cat_imputer2 ),
                ('high_cardinality_cat_transformer', high_cardinality_cat_transformer)            
                        ]))
    ]))        
       
])
In [15]:
class Estimator(BaseEstimator, TransformerMixin):
    
    def __init__(self, transformer,classifier):
        self.transformer = transformer
        self.classifier = classifier

    def fit(self, X, y):
        self.transformer.fit(X,y)
        X_tr = self.transformer.transform(X)         
        self.classifier.fit(X_tr,y)
        return self.classifier

    def predict(self, X):
        X_tr = self.transformer.transform(X)        
        return self.classifier.predict(X_tr)
    
    def predict_proba(self,X):
        X_tr = self.transformer.transform(X)        
        return self.classifier.predict_proba(X_tr)
In [16]:
def pr_plot(y_true,probs, thrshs):
    #Функция для визуализации результатов
    
    precisions=[]
    recalls=[]
    thrshs=thrshs
    for threshold in thrshs:
        y_pred = [1 if prob > threshold else -1 for prob in probs[:,1]]
        precisions.append(precision_score(y_true,y_pred))
        recalls.append(recall_score(y_true,y_pred))

    
    plt.plot(thrshs, precisions,label='precision');    
    plt.plot(thrshs, recalls,label='recall');   
    plt.xlabel('threshold')
    plt.legend()
    plt.title('Зависимость значений метрик от порога')
    plt.grid(True)
    plt.ylabel('metric');
In [17]:
def write_to_submission_file(predicted_labels, out_file,
                             target='result', index_label="Id"):
    # turn predictions into data frame and save as csv file
    predicted_df = pd.DataFrame(predicted_labels,
                                index = np.arange(0, predicted_labels.shape[0]),
                                columns=[target])
    predicted_df.to_csv(out_file, index_label=index_label)
In [18]:
def cat_prep(data,cat_columns):
  data = data.copy()  
  for col in cat_columns:
    data[col] = data[col].astype(str)
  return data  
In [ ]:
#Индексы значемых признаков полсле предобработки
indices_lists = data_preprocessor(feats_train,params=[0.7, 0.5, 30])# params = [prop_nan,max_prop_unique,N_lim]


#Заполнение пропущенных значений
#Числовые признаки
numeric_imputer = impute.SimpleImputer()#по умолчанию заполняется средними значениями
numeric_scaler = None

#Категориальные признаки
occurrence_med_prop = 0.1#доля размера категории от медианного значения для признака для дальнейшего объединения таких категорий в одну
cat_imputer1 =  MyCatMergerNaInputer(occurrence_med_prop = occurrence_med_prop,fill_value='Hi')
cat_imputer2 =  MyCatMergerNaInputer(occurrence_med_prop = occurrence_med_prop,fill_value='Hi')

#Методы обработки категориальных признаков
(low_cardinality_cat_transformer1,high_cardinality_cat_transformer1) = (preprocessing.OneHotEncoder(handle_unknown = 'ignore'),
                                                                        preprocessing.OneHotEncoder(handle_unknown = 'ignore'))
                                                                        
#Список трансформеров низкого уровня                                                                      
low_level_transformers = [numeric_imputer,numeric_scaler,cat_imputer1,cat_imputer2,\
                low_cardinality_cat_transformer1,high_cardinality_cat_transformer1]
#Итоговый трансформер
transformer = transformation(low_level_transformers,indices_lists)
In [ ]:
#Классификатор и итоговый алгоритм
clf = xgb.XGBClassifier(random_state=42,n_estimators=100,gamma=0.1,max_depth=3,reg_alpha=1,min_child_weight=1)
    
estimator = Estimator(transformer,clf)
In [ ]:
warnings.filterwarnings('ignore')

estimator.fit(feats_train.values,labels_train.values)

prb = estimator.predict_proba(feats_val.values)                                                                          
                                                                
print(f'ROC AUC: {roc_auc_score(labels_val.values,prb[:,1])}')    
print(f'PRC AUC: {average_precision_score(labels_val.values,prb[:,1])}')   
                                                                        
pred = estimator.predict(feats_val.values)
print(classification_report(labels_val.values,pred))  
print('------------------')
print('------------------')

pr_plot(labels_val.values,prb,[0.05,0.1,0.2,0.25,0.3,0.35,0.4,0.5,0.6,0.7,0.8,0.9,0.95,0.96,0.97,0.99])
ROC AUC: 0.7378596430063221
PRC AUC: 0.2077076585475081
              precision    recall  f1-score   support

          -1       0.93      1.00      0.96     11107
           1       0.55      0.01      0.02       893

    accuracy                           0.93     12000
   macro avg       0.74      0.51      0.49     12000
weighted avg       0.90      0.93      0.89     12000

------------------
------------------
In [20]:
#Индексы значимых признаков полсле предобработки
[num_inds, cat_indices1,cat_indices2]= data_preprocessor(feats_train,params=[0.7, 0.5, 30])# params = [prop_nan,max_prop_unique,N_lim]
In [21]:
cat_columns = list(np.array(features.columns)[cat_indices1]) + list(np.array(features.columns)[cat_indices2])
print(cat_columns)
['Var7', 'Var35', 'Var44', 'Var65', 'Var72', 'Var78', 'Var132', 'Var143', 'Var144', 'Var173', 'Var181', 'Var195', 'Var196', 'Var203', 'Var205', 'Var206', 'Var207', 'Var208', 'Var210', 'Var211', 'Var218', 'Var219', 'Var221', 'Var223', 'Var225', 'Var226', 'Var227', 'Var229', 'Var192', 'Var193', 'Var197', 'Var198', 'Var199', 'Var202', 'Var204', 'Var212', 'Var216', 'Var217', 'Var220', 'Var222', 'Var228']
In [22]:
num_columns = list(np.array(features.columns)[num_inds])
print(num_columns)
['Var6', 'Var13', 'Var21', 'Var22', 'Var24', 'Var25', 'Var28', 'Var38', 'Var57', 'Var73', 'Var74', 'Var76', 'Var81', 'Var83', 'Var85', 'Var94', 'Var109', 'Var112', 'Var113', 'Var119', 'Var123', 'Var125', 'Var126', 'Var133', 'Var134', 'Var140', 'Var149', 'Var153', 'Var160', 'Var163', 'Var189']
In [23]:
good_columns = num_columns + cat_columns
In [ ]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=500,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.08,
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )
In [ ]:
estimator_cb.fit(cat_prep(feats_train[good_columns],cat_columns), labels_train, cat_features = cat_columns,verbose=10, plot=True,
                 eval_set = (cat_prep(feats_val[good_columns],cat_columns), labels_val))
0:	learn: 0.5447010	test: 0.5331667	best: 0.5331667 (0)	total: 136ms	remaining: 1m 7s
10:	learn: 0.5710723	test: 0.5727358	best: 0.5727358 (10)	total: 899ms	remaining: 40s
20:	learn: 0.5846300	test: 0.5855039	best: 0.5855039 (20)	total: 1.65s	remaining: 37.6s
30:	learn: 0.6770678	test: 0.6701294	best: 0.6701294 (30)	total: 2.45s	remaining: 37s
40:	learn: 0.7173511	test: 0.7078972	best: 0.7078972 (40)	total: 3.29s	remaining: 36.9s
50:	learn: 0.7262440	test: 0.7148843	best: 0.7148843 (50)	total: 4.15s	remaining: 36.5s
60:	learn: 0.7315806	test: 0.7184889	best: 0.7185946 (57)	total: 5.03s	remaining: 36.2s
70:	learn: 0.7366703	test: 0.7222066	best: 0.7223936 (67)	total: 5.9s	remaining: 35.7s
80:	learn: 0.7419034	test: 0.7254476	best: 0.7257717 (79)	total: 6.76s	remaining: 35s
90:	learn: 0.7466782	test: 0.7281565	best: 0.7283261 (89)	total: 7.57s	remaining: 34s
100:	learn: 0.7488860	test: 0.7292999	best: 0.7293321 (99)	total: 8.47s	remaining: 33.5s
110:	learn: 0.7522508	test: 0.7307438	best: 0.7307438 (110)	total: 9.31s	remaining: 32.6s
120:	learn: 0.7536407	test: 0.7312691	best: 0.7313125 (119)	total: 10.1s	remaining: 31.7s
130:	learn: 0.7552121	test: 0.7314820	best: 0.7319006 (126)	total: 11s	remaining: 30.9s
140:	learn: 0.7565903	test: 0.7323825	best: 0.7323825 (140)	total: 11.8s	remaining: 29.9s
150:	learn: 0.7579563	test: 0.7325054	best: 0.7325125 (142)	total: 12.6s	remaining: 29s
160:	learn: 0.7588524	test: 0.7319401	best: 0.7325125 (142)	total: 13.4s	remaining: 28.2s
170:	learn: 0.7594942	test: 0.7319781	best: 0.7325125 (142)	total: 14.2s	remaining: 27.3s
180:	learn: 0.7605939	test: 0.7318507	best: 0.7325125 (142)	total: 15s	remaining: 26.4s
190:	learn: 0.7613096	test: 0.7318358	best: 0.7325125 (142)	total: 15.8s	remaining: 25.6s
200:	learn: 0.7620052	test: 0.7316928	best: 0.7325125 (142)	total: 16.6s	remaining: 24.7s
210:	learn: 0.7633407	test: 0.7315593	best: 0.7325125 (142)	total: 17.4s	remaining: 23.9s
220:	learn: 0.7643440	test: 0.7314563	best: 0.7325125 (142)	total: 18.3s	remaining: 23.1s
230:	learn: 0.7654004	test: 0.7316696	best: 0.7325125 (142)	total: 19.1s	remaining: 22.2s
240:	learn: 0.7662393	test: 0.7318971	best: 0.7325125 (142)	total: 19.8s	remaining: 21.3s
250:	learn: 0.7668014	test: 0.7318707	best: 0.7325125 (142)	total: 20.6s	remaining: 20.4s
260:	learn: 0.7678202	test: 0.7310467	best: 0.7325125 (142)	total: 21.4s	remaining: 19.6s
270:	learn: 0.7680104	test: 0.7310359	best: 0.7325125 (142)	total: 22.2s	remaining: 18.8s
280:	learn: 0.7682319	test: 0.7308916	best: 0.7325125 (142)	total: 22.9s	remaining: 17.9s
290:	learn: 0.7699705	test: 0.7318301	best: 0.7325125 (142)	total: 23.7s	remaining: 17s
300:	learn: 0.7712737	test: 0.7327570	best: 0.7327570 (300)	total: 24.5s	remaining: 16.2s
310:	learn: 0.7714523	test: 0.7326469	best: 0.7327570 (300)	total: 25.2s	remaining: 15.3s
320:	learn: 0.7719740	test: 0.7327244	best: 0.7327570 (300)	total: 26s	remaining: 14.5s
330:	learn: 0.7721339	test: 0.7328455	best: 0.7328545 (325)	total: 26.8s	remaining: 13.7s
340:	learn: 0.7722789	test: 0.7325192	best: 0.7328545 (325)	total: 27.4s	remaining: 12.8s
350:	learn: 0.7733998	test: 0.7327771	best: 0.7329434 (346)	total: 28.2s	remaining: 12s
360:	learn: 0.7736489	test: 0.7325862	best: 0.7329434 (346)	total: 29s	remaining: 11.2s
370:	learn: 0.7741174	test: 0.7325875	best: 0.7329434 (346)	total: 29.7s	remaining: 10.3s
380:	learn: 0.7750363	test: 0.7325769	best: 0.7329434 (346)	total: 30.5s	remaining: 9.52s
390:	learn: 0.7758738	test: 0.7333636	best: 0.7334556 (385)	total: 31.3s	remaining: 8.72s
400:	learn: 0.7760988	test: 0.7330433	best: 0.7334556 (385)	total: 32s	remaining: 7.9s
410:	learn: 0.7765396	test: 0.7328946	best: 0.7334556 (385)	total: 32.6s	remaining: 7.07s
420:	learn: 0.7765758	test: 0.7329066	best: 0.7334556 (385)	total: 33.4s	remaining: 6.26s
430:	learn: 0.7776449	test: 0.7330043	best: 0.7334556 (385)	total: 34.2s	remaining: 5.48s
440:	learn: 0.7780870	test: 0.7323297	best: 0.7334556 (385)	total: 35s	remaining: 4.68s
450:	learn: 0.7781951	test: 0.7323304	best: 0.7334556 (385)	total: 35.7s	remaining: 3.88s
460:	learn: 0.7788358	test: 0.7325013	best: 0.7334556 (385)	total: 36.5s	remaining: 3.09s
470:	learn: 0.7794644	test: 0.7324306	best: 0.7334556 (385)	total: 37.3s	remaining: 2.3s
480:	learn: 0.7805959	test: 0.7327501	best: 0.7334556 (385)	total: 38s	remaining: 1.5s
490:	learn: 0.7814288	test: 0.7325267	best: 0.7334556 (385)	total: 38.9s	remaining: 712ms
499:	learn: 0.7818154	test: 0.7325134	best: 0.7334556 (385)	total: 39.5s	remaining: 0us
bestTest = 0.7334556282
bestIteration = 385
Shrink model to first 386 iterations.
Out[ ]:
<catboost.core.CatBoostClassifier at 0x7f6d37beeef0>
In [ ]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=380,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.08,
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )
In [ ]:
estimator_cb.fit(cat_prep(features[good_columns],cat_columns), labels, cat_features = cat_columns,verbose=10, plot=True)                 
probs = estimator_cb.predict_proba(cat_prep(test_data[good_columns],cat_columns))
write_to_submission_file(probs[:,1],out_file='submission_cb1.csv')
0:	learn: 0.5824234	total: 56.3ms	remaining: 21.3s
10:	learn: 0.5792802	total: 556ms	remaining: 18.7s
20:	learn: 0.6006237	total: 998ms	remaining: 17.1s
30:	learn: 0.6604410	total: 1.42s	remaining: 16s
40:	learn: 0.7027496	total: 1.88s	remaining: 15.5s
50:	learn: 0.7147275	total: 2.31s	remaining: 14.9s
60:	learn: 0.7282001	total: 2.8s	remaining: 14.6s
70:	learn: 0.7408890	total: 3.26s	remaining: 14.2s
80:	learn: 0.7498578	total: 3.73s	remaining: 13.8s
90:	learn: 0.7558098	total: 4.21s	remaining: 13.4s
100:	learn: 0.7601608	total: 4.67s	remaining: 12.9s
110:	learn: 0.7626576	total: 5.16s	remaining: 12.5s
120:	learn: 0.7670048	total: 5.62s	remaining: 12s
130:	learn: 0.7694613	total: 6.08s	remaining: 11.6s
140:	learn: 0.7733936	total: 6.54s	remaining: 11.1s
150:	learn: 0.7751738	total: 7.01s	remaining: 10.6s
160:	learn: 0.7777317	total: 7.46s	remaining: 10.1s
170:	learn: 0.7794589	total: 7.92s	remaining: 9.68s
180:	learn: 0.7819069	total: 8.39s	remaining: 9.22s
190:	learn: 0.7843453	total: 8.85s	remaining: 8.76s
200:	learn: 0.7861932	total: 9.32s	remaining: 8.3s
210:	learn: 0.7878673	total: 9.76s	remaining: 7.81s
220:	learn: 0.7903249	total: 10.2s	remaining: 7.36s
230:	learn: 0.7920935	total: 10.7s	remaining: 6.88s
240:	learn: 0.7936238	total: 11.2s	remaining: 6.43s
250:	learn: 0.7955011	total: 11.6s	remaining: 5.95s
260:	learn: 0.7969472	total: 12.1s	remaining: 5.5s
270:	learn: 0.7990060	total: 12.5s	remaining: 5.04s
280:	learn: 0.8004860	total: 13s	remaining: 4.57s
290:	learn: 0.8020282	total: 13.4s	remaining: 4.11s
300:	learn: 0.8042864	total: 13.9s	remaining: 3.65s
310:	learn: 0.8057619	total: 14.3s	remaining: 3.18s
320:	learn: 0.8070054	total: 14.8s	remaining: 2.72s
330:	learn: 0.8082265	total: 15.3s	remaining: 2.27s
340:	learn: 0.8095026	total: 15.7s	remaining: 1.8s
350:	learn: 0.8109970	total: 16.2s	remaining: 1.34s
360:	learn: 0.8121376	total: 16.7s	remaining: 878ms
370:	learn: 0.8133637	total: 17.1s	remaining: 416ms
379:	learn: 0.8142136	total: 17.6s	remaining: 0us
In [ ]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=380,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.08,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )

estimator_cb.fit(cat_prep(feats_train[good_columns],cat_columns), labels_train, cat_features = cat_columns,verbose=10, plot=True,
                 eval_set = (cat_prep(feats_val[good_columns],cat_columns), labels_val))
0:	learn: 0.5447010	test: 0.5331667	best: 0.5331667 (0)	total: 134ms	remaining: 51s
10:	learn: 0.5710723	test: 0.5727358	best: 0.5727358 (10)	total: 880ms	remaining: 29.5s
20:	learn: 0.5846300	test: 0.5855039	best: 0.5855039 (20)	total: 1.62s	remaining: 27.7s
30:	learn: 0.6770678	test: 0.6701294	best: 0.6701294 (30)	total: 2.42s	remaining: 27.2s
40:	learn: 0.7173511	test: 0.7078972	best: 0.7078972 (40)	total: 3.26s	remaining: 27s
50:	learn: 0.7262440	test: 0.7148843	best: 0.7148843 (50)	total: 4.09s	remaining: 26.4s
60:	learn: 0.7315806	test: 0.7184889	best: 0.7185946 (57)	total: 4.98s	remaining: 26.1s
70:	learn: 0.7366703	test: 0.7222066	best: 0.7223936 (67)	total: 5.87s	remaining: 25.5s
80:	learn: 0.7419034	test: 0.7254476	best: 0.7257717 (79)	total: 6.71s	remaining: 24.8s
90:	learn: 0.7466782	test: 0.7281565	best: 0.7283261 (89)	total: 7.51s	remaining: 23.9s
100:	learn: 0.7488860	test: 0.7292999	best: 0.7293321 (99)	total: 8.39s	remaining: 23.2s
110:	learn: 0.7522508	test: 0.7307438	best: 0.7307438 (110)	total: 9.21s	remaining: 22.3s
120:	learn: 0.7536407	test: 0.7312691	best: 0.7313125 (119)	total: 10s	remaining: 21.4s
130:	learn: 0.7552121	test: 0.7314820	best: 0.7319006 (126)	total: 10.8s	remaining: 20.6s
140:	learn: 0.7565903	test: 0.7323825	best: 0.7323825 (140)	total: 11.6s	remaining: 19.7s
150:	learn: 0.7579563	test: 0.7325054	best: 0.7325125 (142)	total: 12.4s	remaining: 18.9s
160:	learn: 0.7588524	test: 0.7319401	best: 0.7325125 (142)	total: 13.2s	remaining: 18s
170:	learn: 0.7594942	test: 0.7319781	best: 0.7325125 (142)	total: 14s	remaining: 17.1s
180:	learn: 0.7605939	test: 0.7318507	best: 0.7325125 (142)	total: 14.8s	remaining: 16.3s
190:	learn: 0.7613096	test: 0.7318358	best: 0.7325125 (142)	total: 15.6s	remaining: 15.4s
200:	learn: 0.7620052	test: 0.7316928	best: 0.7325125 (142)	total: 16.4s	remaining: 14.6s
210:	learn: 0.7633407	test: 0.7315593	best: 0.7325125 (142)	total: 17.2s	remaining: 13.7s
220:	learn: 0.7643440	test: 0.7314563	best: 0.7325125 (142)	total: 18s	remaining: 12.9s
230:	learn: 0.7654004	test: 0.7316696	best: 0.7325125 (142)	total: 18.8s	remaining: 12.1s
240:	learn: 0.7659970	test: 0.7316588	best: 0.7325125 (142)	total: 19.6s	remaining: 11.3s
250:	learn: 0.7666864	test: 0.7315628	best: 0.7325125 (142)	total: 20.3s	remaining: 10.5s
260:	learn: 0.7683918	test: 0.7317373	best: 0.7325125 (142)	total: 21.1s	remaining: 9.64s
270:	learn: 0.7694087	test: 0.7315161	best: 0.7325125 (142)	total: 22s	remaining: 8.84s
280:	learn: 0.7698049	test: 0.7314147	best: 0.7325125 (142)	total: 22.8s	remaining: 8.02s
290:	learn: 0.7705171	test: 0.7313753	best: 0.7325125 (142)	total: 23.5s	remaining: 7.18s
300:	learn: 0.7714718	test: 0.7320415	best: 0.7325125 (142)	total: 24.3s	remaining: 6.36s
310:	learn: 0.7718175	test: 0.7316291	best: 0.7325125 (142)	total: 24.9s	remaining: 5.53s
320:	learn: 0.7726133	test: 0.7314689	best: 0.7325125 (142)	total: 25.6s	remaining: 4.71s
330:	learn: 0.7730308	test: 0.7318829	best: 0.7325125 (142)	total: 26.5s	remaining: 3.92s
340:	learn: 0.7734050	test: 0.7315179	best: 0.7325125 (142)	total: 27.2s	remaining: 3.11s
350:	learn: 0.7746783	test: 0.7318955	best: 0.7325125 (142)	total: 28s	remaining: 2.31s
360:	learn: 0.7756497	test: 0.7320777	best: 0.7325125 (142)	total: 28.7s	remaining: 1.51s
370:	learn: 0.7761204	test: 0.7322873	best: 0.7325125 (142)	total: 29.5s	remaining: 715ms
379:	learn: 0.7766709	test: 0.7324679	best: 0.7325125 (142)	total: 30.1s	remaining: 0us
bestTest = 0.7325125337
bestIteration = 142
Shrink model to first 143 iterations.
Out[ ]:
<catboost.core.CatBoostClassifier at 0x7f6d36cf9a20>
In [ ]:
estimator_cb.fit(cat_prep(features[good_columns],cat_columns), labels, cat_features = cat_columns,verbose=10, plot=True)                 
probs = estimator_cb.predict_proba(cat_prep(test_data[good_columns],cat_columns))
write_to_submission_file(probs[:,1],out_file='submission_cb2.csv')
0:	learn: 0.5667429	total: 106ms	remaining: 40.2s
10:	learn: 0.5781668	total: 977ms	remaining: 32.8s
20:	learn: 0.5862285	total: 1.75s	remaining: 29.9s
30:	learn: 0.6401380	total: 2.52s	remaining: 28.4s
40:	learn: 0.6948748	total: 3.31s	remaining: 27.4s
50:	learn: 0.7169213	total: 4.13s	remaining: 26.7s
60:	learn: 0.7254314	total: 4.89s	remaining: 25.6s
70:	learn: 0.7303921	total: 5.65s	remaining: 24.6s
80:	learn: 0.7370947	total: 6.46s	remaining: 23.9s
90:	learn: 0.7401665	total: 7.27s	remaining: 23.1s
100:	learn: 0.7447180	total: 8.07s	remaining: 22.3s
110:	learn: 0.7480026	total: 8.89s	remaining: 21.6s
120:	learn: 0.7501602	total: 9.68s	remaining: 20.7s
130:	learn: 0.7515897	total: 10.5s	remaining: 19.9s
140:	learn: 0.7535840	total: 11.3s	remaining: 19.1s
150:	learn: 0.7543607	total: 12.1s	remaining: 18.3s
160:	learn: 0.7553576	total: 12.9s	remaining: 17.5s
170:	learn: 0.7567571	total: 13.7s	remaining: 16.7s
180:	learn: 0.7576030	total: 14.4s	remaining: 15.9s
190:	learn: 0.7584188	total: 15.2s	remaining: 15.1s
200:	learn: 0.7590002	total: 16s	remaining: 14.2s
210:	learn: 0.7599187	total: 16.7s	remaining: 13.4s
220:	learn: 0.7607112	total: 17.4s	remaining: 12.5s
230:	learn: 0.7621620	total: 18.2s	remaining: 11.7s
240:	learn: 0.7631459	total: 18.9s	remaining: 10.9s
250:	learn: 0.7639785	total: 19.7s	remaining: 10.1s
260:	learn: 0.7649977	total: 20.5s	remaining: 9.34s
270:	learn: 0.7653381	total: 21.2s	remaining: 8.54s
280:	learn: 0.7655454	total: 21.9s	remaining: 7.72s
290:	learn: 0.7664025	total: 22.7s	remaining: 6.94s
300:	learn: 0.7670635	total: 23.5s	remaining: 6.16s
310:	learn: 0.7680891	total: 24.2s	remaining: 5.38s
320:	learn: 0.7686734	total: 24.9s	remaining: 4.58s
330:	learn: 0.7692505	total: 25.7s	remaining: 3.8s
340:	learn: 0.7696139	total: 26.4s	remaining: 3.02s
350:	learn: 0.7698284	total: 27.1s	remaining: 2.24s
360:	learn: 0.7700433	total: 27.8s	remaining: 1.46s
370:	learn: 0.7702186	total: 28.6s	remaining: 693ms
379:	learn: 0.7707325	total: 29.2s	remaining: 0us

image.png

In [ ]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=380,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.08,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bayesian',
                                  bagging_temperature=1,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )

estimator_cb.fit(cat_prep(feats_train[good_columns],cat_columns), labels_train, cat_features = cat_columns,verbose=10, plot=True,
                 eval_set = (cat_prep(feats_val[good_columns],cat_columns), labels_val))
0:	learn: 0.5152289	test: 0.5102524	best: 0.5102524 (0)	total: 140ms	remaining: 53.2s
10:	learn: 0.5707900	test: 0.5767617	best: 0.5782275 (4)	total: 893ms	remaining: 30s
20:	learn: 0.5797552	test: 0.5831602	best: 0.5831602 (20)	total: 1.62s	remaining: 27.7s
30:	learn: 0.6400428	test: 0.6383710	best: 0.6383710 (30)	total: 2.38s	remaining: 26.8s
40:	learn: 0.7061993	test: 0.6974573	best: 0.6974573 (40)	total: 3.21s	remaining: 26.5s
50:	learn: 0.7128442	test: 0.7033105	best: 0.7033105 (50)	total: 3.98s	remaining: 25.7s
60:	learn: 0.7233586	test: 0.7124535	best: 0.7124535 (60)	total: 4.83s	remaining: 25.2s
70:	learn: 0.7312139	test: 0.7175669	best: 0.7175669 (70)	total: 5.64s	remaining: 24.5s
80:	learn: 0.7337432	test: 0.7182906	best: 0.7182906 (80)	total: 6.47s	remaining: 23.9s
90:	learn: 0.7364989	test: 0.7190194	best: 0.7190194 (90)	total: 7.27s	remaining: 23.1s
100:	learn: 0.7438051	test: 0.7246304	best: 0.7246304 (100)	total: 8.11s	remaining: 22.4s
110:	learn: 0.7456093	test: 0.7262747	best: 0.7263873 (109)	total: 8.93s	remaining: 21.6s
120:	learn: 0.7468942	test: 0.7273726	best: 0.7273726 (120)	total: 9.76s	remaining: 20.9s
130:	learn: 0.7496106	test: 0.7292648	best: 0.7293740 (124)	total: 10.6s	remaining: 20.1s
140:	learn: 0.7507899	test: 0.7298839	best: 0.7298839 (140)	total: 11.4s	remaining: 19.3s
150:	learn: 0.7518415	test: 0.7293288	best: 0.7299204 (141)	total: 12.2s	remaining: 18.6s
160:	learn: 0.7528600	test: 0.7298110	best: 0.7299204 (141)	total: 13s	remaining: 17.7s
170:	learn: 0.7538528	test: 0.7305916	best: 0.7305916 (170)	total: 13.8s	remaining: 16.9s
180:	learn: 0.7559686	test: 0.7310869	best: 0.7311427 (175)	total: 14.7s	remaining: 16.1s
190:	learn: 0.7566716	test: 0.7310762	best: 0.7313793 (184)	total: 15.5s	remaining: 15.3s
200:	learn: 0.7576790	test: 0.7308913	best: 0.7313793 (184)	total: 16.3s	remaining: 14.5s
210:	learn: 0.7587804	test: 0.7313124	best: 0.7315837 (202)	total: 17.1s	remaining: 13.7s
220:	learn: 0.7596022	test: 0.7310604	best: 0.7315837 (202)	total: 18s	remaining: 12.9s
230:	learn: 0.7608127	test: 0.7313342	best: 0.7315837 (202)	total: 18.7s	remaining: 12.1s
240:	learn: 0.7613612	test: 0.7314212	best: 0.7316327 (235)	total: 19.5s	remaining: 11.3s
250:	learn: 0.7623276	test: 0.7310435	best: 0.7316327 (235)	total: 20.3s	remaining: 10.4s
260:	learn: 0.7631220	test: 0.7307836	best: 0.7316327 (235)	total: 21.1s	remaining: 9.63s
270:	learn: 0.7643692	test: 0.7313931	best: 0.7316327 (235)	total: 21.9s	remaining: 8.83s
280:	learn: 0.7654957	test: 0.7307179	best: 0.7316327 (235)	total: 22.8s	remaining: 8.02s
290:	learn: 0.7669975	test: 0.7304918	best: 0.7316327 (235)	total: 23.6s	remaining: 7.21s
300:	learn: 0.7674364	test: 0.7303185	best: 0.7316327 (235)	total: 24.4s	remaining: 6.4s
310:	learn: 0.7686724	test: 0.7302678	best: 0.7316327 (235)	total: 25.2s	remaining: 5.58s
320:	learn: 0.7694972	test: 0.7302271	best: 0.7316327 (235)	total: 26s	remaining: 4.79s
330:	learn: 0.7701034	test: 0.7302092	best: 0.7316327 (235)	total: 26.8s	remaining: 3.97s
340:	learn: 0.7713680	test: 0.7305732	best: 0.7316327 (235)	total: 27.6s	remaining: 3.15s
350:	learn: 0.7720867	test: 0.7302030	best: 0.7316327 (235)	total: 28.4s	remaining: 2.34s
360:	learn: 0.7727383	test: 0.7304522	best: 0.7316327 (235)	total: 29.2s	remaining: 1.53s
370:	learn: 0.7734405	test: 0.7304238	best: 0.7316327 (235)	total: 30s	remaining: 728ms
379:	learn: 0.7741121	test: 0.7300493	best: 0.7316327 (235)	total: 30.7s	remaining: 0us
bestTest = 0.7316327095
bestIteration = 235
Shrink model to first 236 iterations.
Out[ ]:
<catboost.core.CatBoostClassifier at 0x7f6d5dcae438>
In [ ]:
estimator_cb.fit(cat_prep(features[good_columns],cat_columns), labels, cat_features = cat_columns,verbose=10, plot=True)                 
probs = estimator_cb.predict_proba(cat_prep(test_data[good_columns],cat_columns))
write_to_submission_file(probs[:,1],out_file='submission_cb4.csv')
0:	learn: 0.5674290	total: 108ms	remaining: 41s
10:	learn: 0.5739045	total: 891ms	remaining: 29.9s
20:	learn: 0.5783136	total: 1.61s	remaining: 27.5s
30:	learn: 0.6362925	total: 2.35s	remaining: 26.5s
40:	learn: 0.6933148	total: 3.07s	remaining: 25.4s
50:	learn: 0.7102132	total: 3.88s	remaining: 25s
60:	learn: 0.7222548	total: 4.72s	remaining: 24.7s
70:	learn: 0.7263156	total: 5.54s	remaining: 24.1s
80:	learn: 0.7320451	total: 6.32s	remaining: 23.3s
90:	learn: 0.7333564	total: 7.12s	remaining: 22.6s
100:	learn: 0.7374036	total: 7.93s	remaining: 21.9s
110:	learn: 0.7411875	total: 8.77s	remaining: 21.2s
120:	learn: 0.7431314	total: 9.55s	remaining: 20.4s
130:	learn: 0.7448965	total: 10.3s	remaining: 19.6s
140:	learn: 0.7473896	total: 11.1s	remaining: 18.8s
150:	learn: 0.7481652	total: 11.9s	remaining: 18s
160:	learn: 0.7486160	total: 12.6s	remaining: 17.2s
170:	learn: 0.7495404	total: 13.5s	remaining: 16.4s
180:	learn: 0.7505172	total: 14.2s	remaining: 15.6s
190:	learn: 0.7511965	total: 15s	remaining: 14.8s
200:	learn: 0.7517535	total: 15.8s	remaining: 14.1s
210:	learn: 0.7532490	total: 16.6s	remaining: 13.3s
220:	learn: 0.7549111	total: 17.5s	remaining: 12.6s
230:	learn: 0.7574193	total: 18.3s	remaining: 11.8s
240:	learn: 0.7580765	total: 19s	remaining: 11s
250:	learn: 0.7586584	total: 19.8s	remaining: 10.2s
260:	learn: 0.7594667	total: 20.6s	remaining: 9.4s
270:	learn: 0.7606046	total: 21.4s	remaining: 8.63s
280:	learn: 0.7618772	total: 22.3s	remaining: 7.84s
290:	learn: 0.7630601	total: 23.1s	remaining: 7.05s
300:	learn: 0.7638532	total: 23.8s	remaining: 6.25s
310:	learn: 0.7646019	total: 24.6s	remaining: 5.46s
320:	learn: 0.7655818	total: 25.3s	remaining: 4.65s
330:	learn: 0.7660613	total: 26.1s	remaining: 3.86s
340:	learn: 0.7669093	total: 26.9s	remaining: 3.07s
350:	learn: 0.7675483	total: 27.6s	remaining: 2.28s
360:	learn: 0.7682363	total: 28.4s	remaining: 1.49s
370:	learn: 0.7688113	total: 29.1s	remaining: 706ms
379:	learn: 0.7694793	total: 29.8s	remaining: 0us

image.png

In [ ]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=504,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.04,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )

estimator_cb.fit(cat_prep(feats_train[good_columns],cat_columns), labels_train, cat_features = cat_columns,verbose=10, plot=True,
                 eval_set = (cat_prep(feats_val[good_columns],cat_columns), labels_val))
0:	learn: 0.5447010	test: 0.5331667	best: 0.5331667 (0)	total: 142ms	remaining: 1m 11s
10:	learn: 0.5708398	test: 0.5721746	best: 0.5762741 (4)	total: 873ms	remaining: 39.1s
20:	learn: 0.5729061	test: 0.5793588	best: 0.5793588 (20)	total: 1.6s	remaining: 36.8s
30:	learn: 0.5752570	test: 0.5782732	best: 0.5793588 (20)	total: 2.34s	remaining: 35.6s
40:	learn: 0.5784188	test: 0.5819761	best: 0.5819761 (40)	total: 3.13s	remaining: 35.3s
50:	learn: 0.5976739	test: 0.5986293	best: 0.5986293 (50)	total: 3.89s	remaining: 34.6s
60:	learn: 0.6605894	test: 0.6573376	best: 0.6573376 (60)	total: 4.64s	remaining: 33.7s
70:	learn: 0.6971366	test: 0.6925094	best: 0.6925094 (70)	total: 5.49s	remaining: 33.5s
80:	learn: 0.7124468	test: 0.7041591	best: 0.7041591 (80)	total: 6.3s	remaining: 32.9s
90:	learn: 0.7182024	test: 0.7084746	best: 0.7084746 (90)	total: 7.14s	remaining: 32.4s
100:	learn: 0.7230836	test: 0.7121795	best: 0.7121795 (100)	total: 7.93s	remaining: 31.6s
110:	learn: 0.7272401	test: 0.7151254	best: 0.7151254 (110)	total: 8.79s	remaining: 31.1s
120:	learn: 0.7328594	test: 0.7195209	best: 0.7195209 (120)	total: 9.67s	remaining: 30.6s
130:	learn: 0.7367095	test: 0.7222172	best: 0.7222172 (130)	total: 10.5s	remaining: 29.9s
140:	learn: 0.7412894	test: 0.7248486	best: 0.7248486 (140)	total: 11.3s	remaining: 29.2s
150:	learn: 0.7435853	test: 0.7267115	best: 0.7268445 (149)	total: 12.2s	remaining: 28.5s
160:	learn: 0.7461034	test: 0.7280468	best: 0.7280468 (160)	total: 13s	remaining: 27.7s
170:	learn: 0.7485957	test: 0.7297958	best: 0.7297958 (170)	total: 13.9s	remaining: 27s
180:	learn: 0.7500162	test: 0.7308028	best: 0.7308200 (179)	total: 14.7s	remaining: 26.2s
190:	learn: 0.7512993	test: 0.7315567	best: 0.7315956 (189)	total: 15.5s	remaining: 25.5s
200:	learn: 0.7520395	test: 0.7318552	best: 0.7318859 (198)	total: 16.4s	remaining: 24.8s
210:	learn: 0.7525786	test: 0.7322771	best: 0.7322771 (210)	total: 17.3s	remaining: 24s
220:	learn: 0.7540899	test: 0.7328164	best: 0.7328863 (213)	total: 18.1s	remaining: 23.2s
230:	learn: 0.7556255	test: 0.7336850	best: 0.7336850 (230)	total: 19s	remaining: 22.4s
240:	learn: 0.7561578	test: 0.7337858	best: 0.7337858 (240)	total: 19.8s	remaining: 21.6s
250:	learn: 0.7565636	test: 0.7337554	best: 0.7338285 (242)	total: 20.6s	remaining: 20.8s
260:	learn: 0.7574094	test: 0.7339690	best: 0.7339978 (257)	total: 21.5s	remaining: 20s
270:	learn: 0.7578563	test: 0.7339549	best: 0.7339978 (257)	total: 22.2s	remaining: 19.1s
280:	learn: 0.7588045	test: 0.7340691	best: 0.7340885 (278)	total: 23s	remaining: 18.3s
290:	learn: 0.7595987	test: 0.7345313	best: 0.7345313 (290)	total: 23.8s	remaining: 17.5s
300:	learn: 0.7600190	test: 0.7345240	best: 0.7346933 (294)	total: 24.7s	remaining: 16.6s
310:	learn: 0.7604388	test: 0.7346696	best: 0.7346933 (294)	total: 25.4s	remaining: 15.8s
320:	learn: 0.7610967	test: 0.7346640	best: 0.7347733 (315)	total: 26.2s	remaining: 14.9s
330:	learn: 0.7613870	test: 0.7347122	best: 0.7347733 (315)	total: 27s	remaining: 14.1s
340:	learn: 0.7615469	test: 0.7348806	best: 0.7349229 (338)	total: 27.7s	remaining: 13.3s
350:	learn: 0.7621936	test: 0.7350282	best: 0.7350891 (347)	total: 28.5s	remaining: 12.4s
360:	learn: 0.7626764	test: 0.7353082	best: 0.7353196 (358)	total: 29.3s	remaining: 11.6s
370:	learn: 0.7637020	test: 0.7359284	best: 0.7359323 (366)	total: 30.1s	remaining: 10.8s
380:	learn: 0.7641343	test: 0.7357811	best: 0.7359323 (366)	total: 30.9s	remaining: 9.97s
390:	learn: 0.7648495	test: 0.7356860	best: 0.7359323 (366)	total: 31.6s	remaining: 9.15s
400:	learn: 0.7654328	test: 0.7359188	best: 0.7359682 (398)	total: 32.5s	remaining: 8.34s
410:	learn: 0.7658461	test: 0.7357125	best: 0.7359682 (398)	total: 33.2s	remaining: 7.52s
420:	learn: 0.7660944	test: 0.7357632	best: 0.7359682 (398)	total: 34s	remaining: 6.7s
430:	learn: 0.7664774	test: 0.7356946	best: 0.7359682 (398)	total: 34.7s	remaining: 5.88s
440:	learn: 0.7668612	test: 0.7354910	best: 0.7359682 (398)	total: 35.6s	remaining: 5.08s
450:	learn: 0.7672439	test: 0.7354954	best: 0.7359682 (398)	total: 36.3s	remaining: 4.27s
460:	learn: 0.7677695	test: 0.7357261	best: 0.7359682 (398)	total: 37.1s	remaining: 3.46s
470:	learn: 0.7681839	test: 0.7359704	best: 0.7360052 (468)	total: 37.9s	remaining: 2.65s
480:	learn: 0.7684728	test: 0.7361467	best: 0.7361467 (480)	total: 38.6s	remaining: 1.85s
490:	learn: 0.7690362	test: 0.7360612	best: 0.7361467 (480)	total: 39.4s	remaining: 1.04s
500:	learn: 0.7692813	test: 0.7362040	best: 0.7362552 (498)	total: 40.2s	remaining: 241ms
503:	learn: 0.7693897	test: 0.7363423	best: 0.7363518 (501)	total: 40.4s	remaining: 0us
bestTest = 0.736351788
bestIteration = 501
Shrink model to first 502 iterations.
Out[ ]:
<catboost.core.CatBoostClassifier at 0x7f6d373c8860>
In [ ]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=446,random_state=0,
                                  eval_metric = 'AUC',
                                  depth=7,
                                  learning_rate=0.04,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )

estimator_cb.fit(cat_prep(feats_train[good_columns],cat_columns), labels_train, cat_features = cat_columns,verbose=10, plot=True,
                 eval_set = (cat_prep(feats_val[good_columns],cat_columns), labels_val))
0:	learn: 0.5447010	test: 0.5331667	best: 0.5331667 (0)	total: 154ms	remaining: 1m 8s
10:	learn: 0.5704298	test: 0.5709307	best: 0.5710093 (9)	total: 1.16s	remaining: 45.7s
20:	learn: 0.5699875	test: 0.5744098	best: 0.5759337 (16)	total: 2.02s	remaining: 40.9s
30:	learn: 0.5742719	test: 0.5760092	best: 0.5763202 (29)	total: 2.93s	remaining: 39.2s
40:	learn: 0.5761983	test: 0.5776572	best: 0.5787051 (38)	total: 3.82s	remaining: 37.7s
50:	learn: 0.5934804	test: 0.5895339	best: 0.5895339 (50)	total: 4.66s	remaining: 36.1s
60:	learn: 0.6559763	test: 0.6537890	best: 0.6537890 (60)	total: 5.75s	remaining: 36.3s
70:	learn: 0.6972478	test: 0.6934958	best: 0.6934958 (70)	total: 6.88s	remaining: 36.3s
80:	learn: 0.7119675	test: 0.7052441	best: 0.7052441 (80)	total: 7.92s	remaining: 35.7s
90:	learn: 0.7193804	test: 0.7101532	best: 0.7101532 (90)	total: 9.02s	remaining: 35.2s
100:	learn: 0.7298220	test: 0.7177342	best: 0.7177342 (100)	total: 10.2s	remaining: 34.9s
110:	learn: 0.7331260	test: 0.7203243	best: 0.7203243 (110)	total: 11.4s	remaining: 34.3s
120:	learn: 0.7354386	test: 0.7212843	best: 0.7215035 (117)	total: 12.5s	remaining: 33.7s
130:	learn: 0.7370118	test: 0.7225661	best: 0.7226961 (126)	total: 13.7s	remaining: 32.8s
140:	learn: 0.7391624	test: 0.7236302	best: 0.7237091 (136)	total: 14.7s	remaining: 31.9s
150:	learn: 0.7424835	test: 0.7256604	best: 0.7256604 (150)	total: 15.9s	remaining: 31.1s
160:	learn: 0.7448289	test: 0.7273459	best: 0.7273459 (160)	total: 17s	remaining: 30.2s
170:	learn: 0.7464763	test: 0.7284009	best: 0.7284009 (170)	total: 18.2s	remaining: 29.3s
180:	learn: 0.7480460	test: 0.7290141	best: 0.7292643 (177)	total: 19.5s	remaining: 28.5s
190:	learn: 0.7491195	test: 0.7293502	best: 0.7294690 (184)	total: 20.6s	remaining: 27.5s
200:	learn: 0.7514631	test: 0.7308407	best: 0.7309477 (197)	total: 21.8s	remaining: 26.6s
210:	learn: 0.7535800	test: 0.7318327	best: 0.7318327 (210)	total: 22.9s	remaining: 25.6s
220:	learn: 0.7545129	test: 0.7321290	best: 0.7321638 (218)	total: 24.1s	remaining: 24.5s
230:	learn: 0.7552478	test: 0.7325428	best: 0.7326999 (228)	total: 25.2s	remaining: 23.4s
240:	learn: 0.7556182	test: 0.7328473	best: 0.7328798 (239)	total: 26.1s	remaining: 22.2s
250:	learn: 0.7564428	test: 0.7329701	best: 0.7329772 (244)	total: 27.2s	remaining: 21.1s
260:	learn: 0.7572416	test: 0.7331524	best: 0.7332043 (259)	total: 28.3s	remaining: 20.1s
270:	learn: 0.7576939	test: 0.7331907	best: 0.7332520 (267)	total: 29.4s	remaining: 19s
280:	learn: 0.7578469	test: 0.7333230	best: 0.7333230 (280)	total: 30.4s	remaining: 17.8s
290:	learn: 0.7582467	test: 0.7335650	best: 0.7335943 (284)	total: 31.4s	remaining: 16.7s
300:	learn: 0.7585591	test: 0.7334042	best: 0.7335943 (284)	total: 32.4s	remaining: 15.6s
310:	learn: 0.7598663	test: 0.7338965	best: 0.7338965 (310)	total: 33.5s	remaining: 14.5s
320:	learn: 0.7601244	test: 0.7337751	best: 0.7339044 (311)	total: 34.6s	remaining: 13.5s
330:	learn: 0.7608018	test: 0.7343054	best: 0.7343054 (330)	total: 35.6s	remaining: 12.4s
340:	learn: 0.7608586	test: 0.7341728	best: 0.7343122 (333)	total: 36.5s	remaining: 11.2s
350:	learn: 0.7616471	test: 0.7344053	best: 0.7344118 (349)	total: 37.6s	remaining: 10.2s
360:	learn: 0.7626665	test: 0.7347010	best: 0.7347010 (360)	total: 38.7s	remaining: 9.11s
370:	learn: 0.7631017	test: 0.7346903	best: 0.7348047 (365)	total: 39.6s	remaining: 8s
380:	learn: 0.7635070	test: 0.7348160	best: 0.7348167 (378)	total: 40.6s	remaining: 6.93s
390:	learn: 0.7635744	test: 0.7347416	best: 0.7348167 (378)	total: 41.6s	remaining: 5.86s
400:	learn: 0.7643076	test: 0.7346435	best: 0.7348167 (378)	total: 42.7s	remaining: 4.79s
410:	learn: 0.7643972	test: 0.7345240	best: 0.7348167 (378)	total: 43.6s	remaining: 3.71s
420:	learn: 0.7646959	test: 0.7346621	best: 0.7348167 (378)	total: 44.7s	remaining: 2.65s
430:	learn: 0.7655105	test: 0.7348857	best: 0.7348857 (430)	total: 45.9s	remaining: 1.6s
440:	learn: 0.7663582	test: 0.7348145	best: 0.7348857 (430)	total: 47s	remaining: 533ms
445:	learn: 0.7664306	test: 0.7348438	best: 0.7348857 (430)	total: 47.5s	remaining: 0us
bestTest = 0.7348857224
bestIteration = 430
Shrink model to first 431 iterations.
Out[ ]:
<catboost.core.CatBoostClassifier at 0x7f6d5ddfa668>
In [ ]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=446,random_state=0,
                                  eval_metric = 'AUC',
                                  depth=10,
                                  learning_rate=0.04,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )

estimator_cb.fit(cat_prep(feats_train[good_columns],cat_columns), labels_train, cat_features = cat_columns,verbose=10, plot=True,
                 eval_set = (cat_prep(feats_val[good_columns],cat_columns), labels_val))
0:	learn: 0.5447010	test: 0.5331667	best: 0.5331667 (0)	total: 92.5ms	remaining: 41.2s
10:	learn: 0.5706185	test: 0.5556193	best: 0.5671717 (4)	total: 1.27s	remaining: 50.3s
20:	learn: 0.5697958	test: 0.5757648	best: 0.5757648 (20)	total: 2.67s	remaining: 54.1s
30:	learn: 0.5744148	test: 0.5768476	best: 0.5768476 (30)	total: 4.8s	remaining: 1m 4s
40:	learn: 0.5790444	test: 0.5782141	best: 0.5782141 (40)	total: 6.27s	remaining: 1m 1s
50:	learn: 0.5921847	test: 0.5888987	best: 0.5888987 (50)	total: 8.02s	remaining: 1m 2s
60:	learn: 0.6445189	test: 0.6380194	best: 0.6380194 (60)	total: 10.7s	remaining: 1m 7s
70:	learn: 0.6835760	test: 0.6749506	best: 0.6749506 (70)	total: 14.2s	remaining: 1m 15s
80:	learn: 0.7089992	test: 0.6980618	best: 0.6981046 (79)	total: 17.8s	remaining: 1m 20s
90:	learn: 0.7251012	test: 0.7106932	best: 0.7106932 (90)	total: 22.1s	remaining: 1m 26s
100:	learn: 0.7306885	test: 0.7155408	best: 0.7155408 (100)	total: 25s	remaining: 1m 25s
110:	learn: 0.7335423	test: 0.7180817	best: 0.7180817 (110)	total: 28.5s	remaining: 1m 25s
120:	learn: 0.7367941	test: 0.7200133	best: 0.7201694 (119)	total: 31.7s	remaining: 1m 25s
130:	learn: 0.7403794	test: 0.7225851	best: 0.7225851 (130)	total: 36.6s	remaining: 1m 27s
140:	learn: 0.7416063	test: 0.7238416	best: 0.7239803 (137)	total: 39.5s	remaining: 1m 25s
150:	learn: 0.7438808	test: 0.7246575	best: 0.7246575 (150)	total: 42.1s	remaining: 1m 22s
160:	learn: 0.7468390	test: 0.7268579	best: 0.7268579 (160)	total: 45.4s	remaining: 1m 20s
170:	learn: 0.7498748	test: 0.7286580	best: 0.7286580 (170)	total: 48s	remaining: 1m 17s
180:	learn: 0.7512453	test: 0.7287458	best: 0.7289341 (176)	total: 52.1s	remaining: 1m 16s
190:	learn: 0.7527556	test: 0.7294181	best: 0.7294181 (190)	total: 54.8s	remaining: 1m 13s
200:	learn: 0.7554014	test: 0.7305602	best: 0.7305602 (200)	total: 58.6s	remaining: 1m 11s
210:	learn: 0.7563993	test: 0.7311646	best: 0.7311646 (210)	total: 1m 2s	remaining: 1m 9s
220:	learn: 0.7577929	test: 0.7314076	best: 0.7314076 (220)	total: 1m 5s	remaining: 1m 6s
230:	learn: 0.7589943	test: 0.7315285	best: 0.7315519 (229)	total: 1m 7s	remaining: 1m 3s
240:	learn: 0.7604855	test: 0.7327194	best: 0.7327194 (240)	total: 1m 11s	remaining: 1m
250:	learn: 0.7611980	test: 0.7330785	best: 0.7331047 (244)	total: 1m 13s	remaining: 57s
260:	learn: 0.7620750	test: 0.7332669	best: 0.7333096 (256)	total: 1m 16s	remaining: 54.3s
270:	learn: 0.7628267	test: 0.7337272	best: 0.7337278 (267)	total: 1m 19s	remaining: 51.2s
280:	learn: 0.7630854	test: 0.7340643	best: 0.7340643 (280)	total: 1m 20s	remaining: 47.5s
290:	learn: 0.7637356	test: 0.7343504	best: 0.7343621 (285)	total: 1m 22s	remaining: 44.1s
300:	learn: 0.7644617	test: 0.7346733	best: 0.7346749 (299)	total: 1m 25s	remaining: 41s
310:	learn: 0.7651109	test: 0.7342982	best: 0.7346749 (299)	total: 1m 27s	remaining: 38.1s
320:	learn: 0.7656462	test: 0.7342840	best: 0.7346749 (299)	total: 1m 29s	remaining: 35s
330:	learn: 0.7670267	test: 0.7346148	best: 0.7346749 (299)	total: 1m 33s	remaining: 32.3s
340:	learn: 0.7679095	test: 0.7346841	best: 0.7349898 (335)	total: 1m 34s	remaining: 29.2s
350:	learn: 0.7690600	test: 0.7348184	best: 0.7349898 (335)	total: 1m 36s	remaining: 26.3s
360:	learn: 0.7697250	test: 0.7347533	best: 0.7349898 (335)	total: 1m 39s	remaining: 23.4s
370:	learn: 0.7700650	test: 0.7348813	best: 0.7349898 (335)	total: 1m 41s	remaining: 20.6s
380:	learn: 0.7703701	test: 0.7348338	best: 0.7349898 (335)	total: 1m 43s	remaining: 17.7s
390:	learn: 0.7707838	test: 0.7349834	best: 0.7351052 (386)	total: 1m 45s	remaining: 14.9s
400:	learn: 0.7715112	test: 0.7349209	best: 0.7351052 (386)	total: 1m 48s	remaining: 12.2s
410:	learn: 0.7720014	test: 0.7347062	best: 0.7351052 (386)	total: 1m 50s	remaining: 9.43s
420:	learn: 0.7724572	test: 0.7346541	best: 0.7351052 (386)	total: 1m 52s	remaining: 6.71s
430:	learn: 0.7732050	test: 0.7347062	best: 0.7351052 (386)	total: 1m 55s	remaining: 4.02s
440:	learn: 0.7735919	test: 0.7344796	best: 0.7351052 (386)	total: 1m 57s	remaining: 1.33s
445:	learn: 0.7736749	test: 0.7344522	best: 0.7351052 (386)	total: 1m 58s	remaining: 0us
bestTest = 0.7351051569
bestIteration = 386
Shrink model to first 387 iterations.
Out[ ]:
<catboost.core.CatBoostClassifier at 0x7f6d373e4780>
In [ ]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=400,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.04,
                                  depth=10,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )
estimator_cb.fit(cat_prep(features[good_columns],cat_columns), labels, cat_features = cat_columns,verbose=10, plot=True)                 
probs = estimator_cb.predict_proba(cat_prep(test_data[good_columns],cat_columns))
write_to_submission_file(probs[:,1],out_file='submission_cb5.csv')
0:	learn: 0.5667429	total: 92.2ms	remaining: 36.8s
10:	learn: 0.5734318	total: 1.94s	remaining: 1m 8s
20:	learn: 0.5742779	total: 3.47s	remaining: 1m 2s
30:	learn: 0.5801413	total: 5.71s	remaining: 1m 8s
40:	learn: 0.5850573	total: 6.8s	remaining: 59.5s
50:	learn: 0.5888993	total: 9.06s	remaining: 1m 1s
60:	learn: 0.6495255	total: 11.2s	remaining: 1m 2s
70:	learn: 0.6734518	total: 13.1s	remaining: 1m
80:	learn: 0.6966035	total: 16.6s	remaining: 1m 5s
90:	learn: 0.7040054	total: 19.7s	remaining: 1m 6s
100:	learn: 0.7144088	total: 23.3s	remaining: 1m 8s
110:	learn: 0.7202201	total: 25.6s	remaining: 1m 6s
120:	learn: 0.7253250	total: 28.1s	remaining: 1m 4s
130:	learn: 0.7300747	total: 31.1s	remaining: 1m 3s
140:	learn: 0.7323713	total: 34s	remaining: 1m 2s
150:	learn: 0.7352472	total: 36.4s	remaining: 1m
160:	learn: 0.7395209	total: 38.6s	remaining: 57.3s
170:	learn: 0.7423800	total: 42.3s	remaining: 56.7s
180:	learn: 0.7437104	total: 45s	remaining: 54.4s
190:	learn: 0.7452051	total: 48.1s	remaining: 52.7s
200:	learn: 0.7469370	total: 51.3s	remaining: 50.8s
210:	learn: 0.7480311	total: 54.3s	remaining: 48.6s
220:	learn: 0.7488669	total: 57.8s	remaining: 46.8s
230:	learn: 0.7499840	total: 1m	remaining: 44.4s
240:	learn: 0.7513503	total: 1m 4s	remaining: 42.7s
250:	learn: 0.7528935	total: 1m 7s	remaining: 40.2s
260:	learn: 0.7540420	total: 1m 10s	remaining: 37.3s
270:	learn: 0.7549557	total: 1m 13s	remaining: 35s
280:	learn: 0.7560596	total: 1m 16s	remaining: 32.2s
290:	learn: 0.7570670	total: 1m 18s	remaining: 29.3s
300:	learn: 0.7573483	total: 1m 19s	remaining: 26.1s
310:	learn: 0.7580114	total: 1m 21s	remaining: 23.4s
320:	learn: 0.7582013	total: 1m 23s	remaining: 20.5s
330:	learn: 0.7587690	total: 1m 25s	remaining: 17.9s
340:	learn: 0.7591087	total: 1m 27s	remaining: 15.2s
350:	learn: 0.7594655	total: 1m 29s	remaining: 12.5s
360:	learn: 0.7601091	total: 1m 31s	remaining: 9.93s
370:	learn: 0.7603166	total: 1m 33s	remaining: 7.27s
380:	learn: 0.7612735	total: 1m 35s	remaining: 4.78s
390:	learn: 0.7617249	total: 1m 38s	remaining: 2.27s
399:	learn: 0.7621951	total: 1m 40s	remaining: 0us
In [ ]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=446,random_state=0,
                                  eval_metric = 'AUC',
                                  depth=7,
                                  auto_class_weights = 'Balanced',
                                  learning_rate=0.04,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )

estimator_cb.fit(cat_prep(feats_train[good_columns],cat_columns), labels_train, cat_features = cat_columns,verbose=10, plot=True,
                 eval_set = (cat_prep(feats_val[good_columns],cat_columns), labels_val))
0:	learn: 0.6007290	test: 0.5852576	best: 0.5852576 (0)	total: 220ms	remaining: 1m 37s
10:	learn: 0.7103574	test: 0.6983496	best: 0.6983496 (10)	total: 1.37s	remaining: 54.2s
20:	learn: 0.7174943	test: 0.7081704	best: 0.7081704 (20)	total: 2.44s	remaining: 49.5s
30:	learn: 0.7247680	test: 0.7146030	best: 0.7146030 (30)	total: 3.43s	remaining: 45.9s
40:	learn: 0.7292818	test: 0.7185826	best: 0.7187523 (39)	total: 4.46s	remaining: 44.1s
50:	learn: 0.7329651	test: 0.7211073	best: 0.7211073 (50)	total: 5.49s	remaining: 42.6s
60:	learn: 0.7357433	test: 0.7215050	best: 0.7215050 (60)	total: 6.49s	remaining: 40.9s
70:	learn: 0.7399853	test: 0.7242538	best: 0.7242538 (70)	total: 7.53s	remaining: 39.8s
80:	learn: 0.7411895	test: 0.7249576	best: 0.7251056 (79)	total: 8.4s	remaining: 37.8s
90:	learn: 0.7428066	test: 0.7255858	best: 0.7255858 (90)	total: 9.46s	remaining: 36.9s
100:	learn: 0.7434371	test: 0.7259392	best: 0.7259687 (93)	total: 10.5s	remaining: 35.8s
110:	learn: 0.7446579	test: 0.7265469	best: 0.7265469 (110)	total: 11.5s	remaining: 34.8s
120:	learn: 0.7453163	test: 0.7267354	best: 0.7267554 (119)	total: 12.5s	remaining: 33.5s
130:	learn: 0.7456276	test: 0.7268607	best: 0.7268607 (128)	total: 13.5s	remaining: 32.4s
140:	learn: 0.7461345	test: 0.7268834	best: 0.7269580 (138)	total: 14.4s	remaining: 31.2s
150:	learn: 0.7465544	test: 0.7268983	best: 0.7270197 (149)	total: 15.4s	remaining: 30.1s
160:	learn: 0.7472373	test: 0.7271132	best: 0.7271132 (159)	total: 16.4s	remaining: 29s
170:	learn: 0.7473718	test: 0.7272740	best: 0.7272740 (167)	total: 17.4s	remaining: 27.9s
180:	learn: 0.7477958	test: 0.7277795	best: 0.7277795 (179)	total: 18.5s	remaining: 27s
190:	learn: 0.7477961	test: 0.7277499	best: 0.7277795 (179)	total: 19.4s	remaining: 25.9s
200:	learn: 0.7477169	test: 0.7275414	best: 0.7277795 (179)	total: 20.4s	remaining: 24.8s
210:	learn: 0.7486347	test: 0.7279154	best: 0.7279154 (205)	total: 21.3s	remaining: 23.7s
220:	learn: 0.7488810	test: 0.7279184	best: 0.7280842 (214)	total: 22.3s	remaining: 22.7s
230:	learn: 0.7490456	test: 0.7280232	best: 0.7280842 (214)	total: 23.2s	remaining: 21.6s
240:	learn: 0.7490461	test: 0.7280130	best: 0.7280842 (214)	total: 24.2s	remaining: 20.5s
250:	learn: 0.7491663	test: 0.7280142	best: 0.7280842 (214)	total: 25s	remaining: 19.4s
260:	learn: 0.7491663	test: 0.7280142	best: 0.7280842 (214)	total: 25.8s	remaining: 18.3s
270:	learn: 0.7491663	test: 0.7280142	best: 0.7280842 (214)	total: 26.8s	remaining: 17.3s
280:	learn: 0.7491673	test: 0.7280155	best: 0.7280842 (214)	total: 27.9s	remaining: 16.4s
290:	learn: 0.7491674	test: 0.7279914	best: 0.7280842 (214)	total: 29.2s	remaining: 15.6s
300:	learn: 0.7491674	test: 0.7279914	best: 0.7280842 (214)	total: 30.1s	remaining: 14.5s
310:	learn: 0.7493172	test: 0.7281517	best: 0.7281517 (303)	total: 31.1s	remaining: 13.5s
320:	learn: 0.7493620	test: 0.7281707	best: 0.7281707 (319)	total: 32.1s	remaining: 12.5s
330:	learn: 0.7494184	test: 0.7280944	best: 0.7282657 (322)	total: 33.1s	remaining: 11.5s
340:	learn: 0.7494045	test: 0.7278411	best: 0.7282657 (322)	total: 34.4s	remaining: 10.6s
350:	learn: 0.7495476	test: 0.7279474	best: 0.7282657 (322)	total: 35.3s	remaining: 9.56s
360:	learn: 0.7500990	test: 0.7278443	best: 0.7282657 (322)	total: 36.2s	remaining: 8.53s
370:	learn: 0.7502477	test: 0.7274912	best: 0.7282657 (322)	total: 37.1s	remaining: 7.5s
380:	learn: 0.7504004	test: 0.7276026	best: 0.7282657 (322)	total: 38s	remaining: 6.48s
390:	learn: 0.7506172	test: 0.7276774	best: 0.7282657 (322)	total: 38.9s	remaining: 5.47s
400:	learn: 0.7505972	test: 0.7275583	best: 0.7282657 (322)	total: 39.9s	remaining: 4.47s
410:	learn: 0.7506435	test: 0.7273575	best: 0.7282657 (322)	total: 40.7s	remaining: 3.46s
420:	learn: 0.7510425	test: 0.7273640	best: 0.7282657 (322)	total: 41.6s	remaining: 2.47s
430:	learn: 0.7512019	test: 0.7272752	best: 0.7282657 (322)	total: 42.4s	remaining: 1.48s
440:	learn: 0.7511749	test: 0.7271791	best: 0.7282657 (322)	total: 43.2s	remaining: 489ms
445:	learn: 0.7511750	test: 0.7271357	best: 0.7282657 (322)	total: 43.6s	remaining: 0us
bestTest = 0.7282656729
bestIteration = 322
Shrink model to first 323 iterations.
Out[ ]:
<catboost.core.CatBoostClassifier at 0x7f6d5e112400>
In [ ]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=446,random_state=0,
                                  eval_metric = 'AUC',
                                  depth=7,
                                  auto_class_weights = 'SqrtBalanced',
                                  learning_rate=0.04,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )

estimator_cb.fit(cat_prep(feats_train[good_columns],cat_columns), labels_train, cat_features = cat_columns,verbose=10, plot=True,
                 eval_set = (cat_prep(feats_val[good_columns],cat_columns), labels_val))
0:	learn: 0.5447010	test: 0.5331667	best: 0.5331667 (0)	total: 155ms	remaining: 1m 8s
10:	learn: 0.5725548	test: 0.5789314	best: 0.5793024 (6)	total: 1.14s	remaining: 45.1s
20:	learn: 0.5818743	test: 0.5860016	best: 0.5862949 (19)	total: 2.02s	remaining: 41s
30:	learn: 0.5890361	test: 0.5899971	best: 0.5905760 (28)	total: 2.97s	remaining: 39.7s
40:	learn: 0.6471462	test: 0.6413738	best: 0.6413738 (40)	total: 3.92s	remaining: 38.8s
50:	learn: 0.6854489	test: 0.6788197	best: 0.6788197 (50)	total: 4.9s	remaining: 37.9s
60:	learn: 0.7048196	test: 0.6968263	best: 0.6968263 (60)	total: 5.88s	remaining: 37.1s
70:	learn: 0.7193882	test: 0.7082835	best: 0.7082835 (70)	total: 6.95s	remaining: 36.7s
80:	learn: 0.7227406	test: 0.7109630	best: 0.7109630 (80)	total: 7.73s	remaining: 34.8s
90:	learn: 0.7269660	test: 0.7135507	best: 0.7135507 (90)	total: 8.87s	remaining: 34.6s
100:	learn: 0.7327666	test: 0.7177502	best: 0.7177502 (100)	total: 9.98s	remaining: 34.1s
110:	learn: 0.7358003	test: 0.7189570	best: 0.7190117 (109)	total: 11.1s	remaining: 33.4s
120:	learn: 0.7387179	test: 0.7210168	best: 0.7210168 (120)	total: 12s	remaining: 32.3s
130:	learn: 0.7415228	test: 0.7236373	best: 0.7236373 (130)	total: 13.1s	remaining: 31.5s
140:	learn: 0.7431317	test: 0.7249310	best: 0.7249810 (138)	total: 14.1s	remaining: 30.6s
150:	learn: 0.7451515	test: 0.7258693	best: 0.7258693 (150)	total: 15.1s	remaining: 29.5s
160:	learn: 0.7462865	test: 0.7266356	best: 0.7266356 (160)	total: 16.1s	remaining: 28.5s
170:	learn: 0.7477106	test: 0.7276118	best: 0.7276118 (170)	total: 17.1s	remaining: 27.4s
180:	learn: 0.7488785	test: 0.7276118	best: 0.7276118 (180)	total: 18.1s	remaining: 26.6s
190:	learn: 0.7504255	test: 0.7276955	best: 0.7277310 (185)	total: 19.2s	remaining: 25.6s
200:	learn: 0.7510377	test: 0.7279502	best: 0.7279502 (200)	total: 20.2s	remaining: 24.6s
210:	learn: 0.7511797	test: 0.7278928	best: 0.7279523 (205)	total: 21.1s	remaining: 23.5s
220:	learn: 0.7518546	test: 0.7277753	best: 0.7279523 (205)	total: 22.2s	remaining: 22.6s
230:	learn: 0.7529930	test: 0.7286428	best: 0.7286428 (230)	total: 23.3s	remaining: 21.7s
240:	learn: 0.7538623	test: 0.7287961	best: 0.7288150 (239)	total: 24.4s	remaining: 20.7s
250:	learn: 0.7542380	test: 0.7290635	best: 0.7291300 (247)	total: 25.2s	remaining: 19.6s
260:	learn: 0.7544603	test: 0.7291635	best: 0.7291857 (255)	total: 26.1s	remaining: 18.5s
270:	learn: 0.7548414	test: 0.7293718	best: 0.7294235 (267)	total: 27s	remaining: 17.4s
280:	learn: 0.7550093	test: 0.7292848	best: 0.7294235 (267)	total: 28.1s	remaining: 16.5s
290:	learn: 0.7552896	test: 0.7294592	best: 0.7295032 (282)	total: 29.1s	remaining: 15.5s
300:	learn: 0.7555245	test: 0.7295930	best: 0.7295930 (300)	total: 30s	remaining: 14.4s
310:	learn: 0.7561927	test: 0.7295137	best: 0.7295930 (300)	total: 31.1s	remaining: 13.5s
320:	learn: 0.7562955	test: 0.7292652	best: 0.7295930 (300)	total: 32s	remaining: 12.5s
330:	learn: 0.7564525	test: 0.7293108	best: 0.7295930 (300)	total: 32.9s	remaining: 11.4s
340:	learn: 0.7566585	test: 0.7292185	best: 0.7295930 (300)	total: 33.8s	remaining: 10.4s
350:	learn: 0.7569500	test: 0.7295166	best: 0.7295930 (300)	total: 34.8s	remaining: 9.41s
360:	learn: 0.7575614	test: 0.7297857	best: 0.7297857 (360)	total: 35.8s	remaining: 8.42s
370:	learn: 0.7576080	test: 0.7296817	best: 0.7297857 (360)	total: 36.6s	remaining: 7.41s
380:	learn: 0.7576402	test: 0.7296999	best: 0.7297857 (360)	total: 37.6s	remaining: 6.42s
390:	learn: 0.7577057	test: 0.7296872	best: 0.7297857 (360)	total: 38.5s	remaining: 5.41s
400:	learn: 0.7579204	test: 0.7297158	best: 0.7297857 (360)	total: 39.5s	remaining: 4.44s
410:	learn: 0.7583924	test: 0.7303177	best: 0.7303177 (410)	total: 40.5s	remaining: 3.45s
420:	learn: 0.7588491	test: 0.7301729	best: 0.7303177 (410)	total: 41.4s	remaining: 2.46s
430:	learn: 0.7593011	test: 0.7301892	best: 0.7303177 (410)	total: 42.4s	remaining: 1.48s
440:	learn: 0.7593011	test: 0.7301788	best: 0.7303177 (410)	total: 43.1s	remaining: 489ms
445:	learn: 0.7593012	test: 0.7301579	best: 0.7303177 (410)	total: 43.6s	remaining: 0us
bestTest = 0.7303176522
bestIteration = 410
Shrink model to first 411 iterations.
Out[ ]:
<catboost.core.CatBoostClassifier at 0x7f6d37113668>
In [ ]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=446,random_state=0,
                                  eval_metric = 'AUC',
                                  depth=7,
                                  auto_class_weights = 'SqrtBalanced',
                                  learning_rate=0.06,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )
estimator_cb.fit(cat_prep(features[good_columns],cat_columns), labels, cat_features = cat_columns,verbose=10, plot=True)                 
probs = estimator_cb.predict_proba(cat_prep(test_data[good_columns],cat_columns))
write_to_submission_file(probs[:,1],out_file='submission_cb6.csv')
0:	learn: 0.5775378	total: 107ms	remaining: 47.6s
10:	learn: 0.5792089	total: 1.14s	remaining: 45.2s
20:	learn: 0.5915175	total: 2.23s	remaining: 45.2s
30:	learn: 0.6646564	total: 3.14s	remaining: 42.1s
40:	learn: 0.6993670	total: 4.12s	remaining: 40.7s
50:	learn: 0.7130793	total: 5.2s	remaining: 40.3s
60:	learn: 0.7231656	total: 6.34s	remaining: 40s
70:	learn: 0.7303654	total: 7.33s	remaining: 38.7s
80:	learn: 0.7357491	total: 8.29s	remaining: 37.4s
90:	learn: 0.7399889	total: 9.45s	remaining: 36.9s
100:	learn: 0.7420878	total: 10.4s	remaining: 35.6s
110:	learn: 0.7444582	total: 11.4s	remaining: 34.4s
120:	learn: 0.7465216	total: 12.5s	remaining: 33.6s
130:	learn: 0.7483014	total: 13.6s	remaining: 32.8s
140:	learn: 0.7489046	total: 14.7s	remaining: 31.7s
150:	learn: 0.7498923	total: 15.7s	remaining: 30.6s
160:	learn: 0.7506512	total: 16.7s	remaining: 29.5s
170:	learn: 0.7518513	total: 17.7s	remaining: 28.5s
180:	learn: 0.7526207	total: 18.6s	remaining: 27.2s
190:	learn: 0.7527296	total: 19.6s	remaining: 26.2s
200:	learn: 0.7528422	total: 20.6s	remaining: 25.1s
210:	learn: 0.7528325	total: 21.6s	remaining: 24s
220:	learn: 0.7530717	total: 22.5s	remaining: 23s
230:	learn: 0.7536483	total: 23.5s	remaining: 21.8s
240:	learn: 0.7538736	total: 24.4s	remaining: 20.7s
250:	learn: 0.7542146	total: 25.3s	remaining: 19.7s
260:	learn: 0.7546097	total: 26.2s	remaining: 18.6s
270:	learn: 0.7546652	total: 27s	remaining: 17.5s
280:	learn: 0.7547182	total: 27.8s	remaining: 16.3s
290:	learn: 0.7548921	total: 28.7s	remaining: 15.3s
300:	learn: 0.7549771	total: 29.5s	remaining: 14.2s
310:	learn: 0.7550736	total: 30.3s	remaining: 13.2s
320:	learn: 0.7554561	total: 31.2s	remaining: 12.1s
330:	learn: 0.7556438	total: 32s	remaining: 11.1s
340:	learn: 0.7560321	total: 33s	remaining: 10.2s
350:	learn: 0.7562352	total: 33.8s	remaining: 9.14s
360:	learn: 0.7566471	total: 34.7s	remaining: 8.16s
370:	learn: 0.7566159	total: 35.3s	remaining: 7.14s
380:	learn: 0.7567930	total: 36s	remaining: 6.15s
390:	learn: 0.7572623	total: 36.8s	remaining: 5.17s
400:	learn: 0.7572446	total: 37.6s	remaining: 4.21s
410:	learn: 0.7572489	total: 38.3s	remaining: 3.26s
420:	learn: 0.7573028	total: 38.9s	remaining: 2.31s
430:	learn: 0.7578370	total: 39.7s	remaining: 1.38s
440:	learn: 0.7578940	total: 40.4s	remaining: 458ms
445:	learn: 0.7578940	total: 40.7s	remaining: 0us
In [ ]:
train_pool = Pool(cat_prep(feats_train[good_columns],cat_columns), labels_train, cat_features = cat_columns)
In [ ]:
np.array(estimator_cb.get_feature_importance(prettified=True))
Out[ ]:
array([['Var126', 47.530504467969386],
       ['Var217', 10.307294571038822],
       ['Var199', 5.557447795485032],
       ['Var73', 4.561552195885621],
       ['Var202', 3.923420518352015],
       ['Var189', 3.6619198990014374],
       ['Var81', 3.2504747506335407],
       ['Var113', 3.121904815347451],
       ['Var74', 2.58657178757639],
       ['Var206', 2.5194717010854264],
       ['Var7', 1.9520657501141139],
       ['Var205', 1.9346772124471632],
       ['Var192', 1.4923331173833794],
       ['Var212', 1.2267756887671426],
       ['Var193', 0.9522770799252476],
       ['Var228', 0.7798236130054602],
       ['Var197', 0.7722518135341467],
       ['Var216', 0.6653547074413615],
       ['Var229', 0.40226857454124365],
       ['Var210', 0.3136759715762227],
       ['Var227', 0.2823840627090325],
       ['Var153', 0.2636498198344613],
       ['Var221', 0.234910294787656],
       ['Var22', 0.19751909181550611],
       ['Var207', 0.19052139035107957],
       ['Var65', 0.16682622061319483],
       ['Var218', 0.13313526623443644],
       ['Var219', 0.13252986766496833],
       ['Var35', 0.13185729097445484],
       ['Var204', 0.12855117003034464],
       ['Var140', 0.11048354572041744],
       ['Var195', 0.09675136790554346],
       ['Var226', 0.090161590824722],
       ['Var163', 0.06328114562529794],
       ['Var78', 0.05750239071573049],
       ['Var28', 0.05642631409211466],
       ['Var198', 0.03630315965900901],
       ['Var132', 0.03479357783264711],
       ['Var38', 0.033961685168112994],
       ['Var222', 0.012654526426521127],
       ['Var24', 0.00858502615477369],
       ['Var83', 0.00841634221815633],
       ['Var220', 0.007922114609140052],
       ['Var143', 0.007918810963134642],
       ['Var123', 0.0007957568872636496],
       ['Var85', 9.213907167204136e-05],
       ['Var6', 0.0],
       ['Var13', 0.0],
       ['Var21', 0.0],
       ['Var25', 0.0],
       ['Var57', 0.0],
       ['Var76', 0.0],
       ['Var94', 0.0],
       ['Var109', 0.0],
       ['Var112', 0.0],
       ['Var119', 0.0],
       ['Var125', 0.0],
       ['Var133', 0.0],
       ['Var134', 0.0],
       ['Var149', 0.0],
       ['Var160', 0.0],
       ['Var44', 0.0],
       ['Var72', 0.0],
       ['Var144', 0.0],
       ['Var173', 0.0],
       ['Var181', 0.0],
       ['Var196', 0.0],
       ['Var203', 0.0],
       ['Var208', 0.0],
       ['Var211', 0.0],
       ['Var223', 0.0],
       ['Var225', 0.0]], dtype=object)
In [ ]:
np.array(estimator_cb.get_feature_importance(train_pool,'LossFunctionChange', prettified=True))
Out[ ]:
array([['Var126', 0.02744540897295049],
       ['Var81', 0.002384751623316861],
       ['Var113', 0.0023596435340573763],
       ['Var189', 0.001938884958334281],
       ['Var205', 0.00173296711814791],
       ['Var73', 0.0017154149975616485],
       ['Var65', 0.0005392523596149612],
       ['Var218', 0.00036017676911848096],
       ['Var210', 0.000278318955126422],
       ['Var227', 0.00024352358423262999],
       ['Var229', 0.00023956504157052194],
       ['Var132', 0.0002166787858081387],
       ['Var22', 0.00017967528131224153],
       ['Var221', 0.0001343242280869017],
       ['Var163', 8.318380877647691e-05],
       ['Var38', 1.2323626652488429e-05],
       ['Var24', 4.40187845018869e-06],
       ['Var83', 3.894085077504883e-06],
       ['Var143', 1.0266975488115726e-06],
       ['Var123', 9.415905340981467e-07],
       ['Var85', 2.4033252654653836e-07],
       ['Var6', 0.0],
       ['Var13', 0.0],
       ['Var21', 0.0],
       ['Var25', 0.0],
       ['Var57', 0.0],
       ['Var76', 0.0],
       ['Var94', 0.0],
       ['Var109', 0.0],
       ['Var112', 0.0],
       ['Var119', 0.0],
       ['Var125', 0.0],
       ['Var133', 0.0],
       ['Var134', 0.0],
       ['Var149', 0.0],
       ['Var160', 0.0],
       ['Var44', 0.0],
       ['Var72', 0.0],
       ['Var144', 0.0],
       ['Var173', 0.0],
       ['Var181', 0.0],
       ['Var196', 0.0],
       ['Var203', 0.0],
       ['Var208', 0.0],
       ['Var211', 0.0],
       ['Var223', 0.0],
       ['Var225', 0.0],
       ['Var28', -6.965811514136888e-05],
       ['Var219', -0.00010439331665365592],
       ['Var74', -0.0001371006555639931],
       ['Var140', -0.0002504770156146252],
       ['Var198', -0.000739944844638916],
       ['Var7', -0.0009018997935607009],
       ['Var207', -0.001088666544355658],
       ['Var222', -0.0012953673362272784],
       ['Var153', -0.0013092306460398273],
       ['Var195', -0.0013957668399046663],
       ['Var193', -0.0015242942600751097],
       ['Var226', -0.002321769328248522],
       ['Var204', -0.0030359212081777684],
       ['Var220', -0.00355933853094891],
       ['Var35', -0.006029704396713649],
       ['Var216', -0.006207046483721834],
       ['Var228', -0.006419909549464078],
       ['Var197', -0.006991798239224252],
       ['Var78', -0.007135721906931574],
       ['Var192', -0.007730487247634216],
       ['Var212', -0.016264963357447888],
       ['Var202', -0.020480594437044242],
       ['Var206', -0.03129629166740353],
       ['Var217', -0.041989033622146656],
       ['Var199', -0.057524405564179246]], dtype=object)
In [ ]:
cat_columns2 = cat_columns.copy()
for var in ['Var212','Var202','Var206','Var217','Var199']:
  cat_columns2.remove(var)
len(cat_columns2),len(cat_columns)
Out[ ]:
(36, 41)
In [ ]:
good_columns2 = num_columns + cat_columns2
In [ ]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=380,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.08,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )

estimator_cb.fit(cat_prep(feats_train[good_columns2],cat_columns2),labels_train, cat_features = cat_columns2,verbose=10, plot=True,
                 eval_set = (cat_prep(feats_val[good_columns2],cat_columns2), labels_val))
0:	learn: 0.5153817	test: 0.5109755	best: 0.5109755 (0)	total: 87.7ms	remaining: 33.2s
10:	learn: 0.5562226	test: 0.5601440	best: 0.5601440 (10)	total: 806ms	remaining: 27s
20:	learn: 0.5577632	test: 0.5623659	best: 0.5624653 (19)	total: 1.51s	remaining: 25.8s
30:	learn: 0.6542436	test: 0.6420541	best: 0.6420541 (30)	total: 2.29s	remaining: 25.8s
40:	learn: 0.7024047	test: 0.6920700	best: 0.6920700 (40)	total: 3.06s	remaining: 25.3s
50:	learn: 0.7169058	test: 0.7050055	best: 0.7050055 (50)	total: 3.91s	remaining: 25.2s
60:	learn: 0.7241767	test: 0.7119759	best: 0.7119759 (60)	total: 4.72s	remaining: 24.7s
70:	learn: 0.7284560	test: 0.7148308	best: 0.7148308 (70)	total: 5.53s	remaining: 24s
80:	learn: 0.7359103	test: 0.7206493	best: 0.7206493 (80)	total: 6.3s	remaining: 23.3s
90:	learn: 0.7409043	test: 0.7229093	best: 0.7229093 (90)	total: 7.12s	remaining: 22.6s
100:	learn: 0.7437638	test: 0.7251222	best: 0.7251824 (98)	total: 7.96s	remaining: 22s
110:	learn: 0.7470413	test: 0.7262711	best: 0.7262711 (110)	total: 8.8s	remaining: 21.3s
120:	learn: 0.7494208	test: 0.7278729	best: 0.7278729 (120)	total: 9.64s	remaining: 20.6s
130:	learn: 0.7509277	test: 0.7289948	best: 0.7289948 (130)	total: 10.4s	remaining: 19.8s
140:	learn: 0.7522581	test: 0.7296814	best: 0.7297333 (139)	total: 11.2s	remaining: 18.9s
150:	learn: 0.7554007	test: 0.7313807	best: 0.7314068 (148)	total: 12s	remaining: 18.2s
160:	learn: 0.7569460	test: 0.7315562	best: 0.7317228 (158)	total: 12.7s	remaining: 17.3s
170:	learn: 0.7591934	test: 0.7330706	best: 0.7330915 (169)	total: 13.5s	remaining: 16.6s
180:	learn: 0.7606207	test: 0.7339893	best: 0.7340603 (179)	total: 14.4s	remaining: 15.8s
190:	learn: 0.7621164	test: 0.7337932	best: 0.7341282 (185)	total: 15.2s	remaining: 15s
200:	learn: 0.7637785	test: 0.7343167	best: 0.7343167 (200)	total: 16s	remaining: 14.2s
210:	learn: 0.7661651	test: 0.7356572	best: 0.7356572 (210)	total: 16.7s	remaining: 13.4s
220:	learn: 0.7678286	test: 0.7356812	best: 0.7359964 (219)	total: 17.5s	remaining: 12.6s
230:	learn: 0.7694594	test: 0.7347978	best: 0.7359964 (219)	total: 18.3s	remaining: 11.8s
240:	learn: 0.7710626	test: 0.7347527	best: 0.7359964 (219)	total: 19.1s	remaining: 11s
250:	learn: 0.7723109	test: 0.7351093	best: 0.7359964 (219)	total: 19.9s	remaining: 10.3s
260:	learn: 0.7739441	test: 0.7356653	best: 0.7359964 (219)	total: 20.7s	remaining: 9.45s
270:	learn: 0.7755677	test: 0.7361345	best: 0.7361345 (270)	total: 21.5s	remaining: 8.66s
280:	learn: 0.7764483	test: 0.7362171	best: 0.7364561 (274)	total: 22.3s	remaining: 7.87s
290:	learn: 0.7772520	test: 0.7363337	best: 0.7364561 (274)	total: 23.1s	remaining: 7.06s
300:	learn: 0.7777527	test: 0.7358929	best: 0.7364561 (274)	total: 23.8s	remaining: 6.25s
310:	learn: 0.7784177	test: 0.7360542	best: 0.7364561 (274)	total: 24.5s	remaining: 5.45s
320:	learn: 0.7791612	test: 0.7359775	best: 0.7364561 (274)	total: 25.3s	remaining: 4.65s
330:	learn: 0.7810394	test: 0.7365184	best: 0.7366984 (325)	total: 26.1s	remaining: 3.86s
340:	learn: 0.7821136	test: 0.7365140	best: 0.7366984 (325)	total: 26.8s	remaining: 3.07s
350:	learn: 0.7834298	test: 0.7364078	best: 0.7366984 (325)	total: 27.6s	remaining: 2.28s
360:	learn: 0.7838520	test: 0.7366120	best: 0.7366984 (325)	total: 28.3s	remaining: 1.49s
370:	learn: 0.7849566	test: 0.7365896	best: 0.7368077 (362)	total: 29.1s	remaining: 705ms
379:	learn: 0.7859668	test: 0.7369667	best: 0.7369723 (378)	total: 29.7s	remaining: 0us
bestTest = 0.7369722724
bestIteration = 378
Shrink model to first 379 iterations.
Out[ ]:
<catboost.core.CatBoostClassifier at 0x7f91c4c5a240>
In [ ]:
estimator_cb.fit(cat_prep(features[good_columns2],cat_columns2), labels, cat_features = cat_columns2,verbose=10, plot=True)                 
probs = estimator_cb.predict_proba(cat_prep(test_data[good_columns2],cat_columns2))
write_to_submission_file(probs[:,1],out_file='submission_cb7.csv')
0:	learn: 0.5412246	total: 82.4ms	remaining: 31.2s
10:	learn: 0.5531063	total: 797ms	remaining: 26.7s
20:	learn: 0.5589820	total: 1.51s	remaining: 25.8s
30:	learn: 0.6301104	total: 2.21s	remaining: 24.8s
40:	learn: 0.6961288	total: 2.99s	remaining: 24.7s
50:	learn: 0.7127787	total: 3.74s	remaining: 24.2s
60:	learn: 0.7224343	total: 4.54s	remaining: 23.7s
70:	learn: 0.7275994	total: 5.37s	remaining: 23.4s
80:	learn: 0.7310714	total: 6.12s	remaining: 22.6s
90:	learn: 0.7351380	total: 6.9s	remaining: 21.9s
100:	learn: 0.7384093	total: 7.68s	remaining: 21.2s
110:	learn: 0.7414550	total: 8.49s	remaining: 20.6s
120:	learn: 0.7431847	total: 9.27s	remaining: 19.8s
130:	learn: 0.7458488	total: 10.1s	remaining: 19.1s
140:	learn: 0.7479162	total: 10.8s	remaining: 18.3s
150:	learn: 0.7493381	total: 11.6s	remaining: 17.6s
160:	learn: 0.7501817	total: 12.3s	remaining: 16.8s
170:	learn: 0.7508208	total: 13.1s	remaining: 16s
180:	learn: 0.7518307	total: 13.8s	remaining: 15.2s
190:	learn: 0.7525224	total: 14.6s	remaining: 14.4s
200:	learn: 0.7535045	total: 15.4s	remaining: 13.7s
210:	learn: 0.7543939	total: 16.1s	remaining: 12.9s
220:	learn: 0.7551419	total: 16.9s	remaining: 12.2s
230:	learn: 0.7564417	total: 17.7s	remaining: 11.4s
240:	learn: 0.7572993	total: 18.5s	remaining: 10.7s
250:	learn: 0.7577916	total: 19.2s	remaining: 9.88s
260:	learn: 0.7585564	total: 20s	remaining: 9.12s
270:	learn: 0.7594474	total: 20.8s	remaining: 8.36s
280:	learn: 0.7598921	total: 21.6s	remaining: 7.59s
290:	learn: 0.7614507	total: 22.3s	remaining: 6.82s
300:	learn: 0.7621530	total: 23.1s	remaining: 6.06s
310:	learn: 0.7631922	total: 23.9s	remaining: 5.3s
320:	learn: 0.7639874	total: 24.6s	remaining: 4.53s
330:	learn: 0.7647478	total: 25.3s	remaining: 3.75s
340:	learn: 0.7653367	total: 26.1s	remaining: 2.98s
350:	learn: 0.7656501	total: 26.8s	remaining: 2.21s
360:	learn: 0.7668285	total: 27.5s	remaining: 1.45s
370:	learn: 0.7670237	total: 28.2s	remaining: 684ms
379:	learn: 0.7675110	total: 28.8s	remaining: 0us
In [ ]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=380,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.08,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )

estimator_cb.fit(cat_prep(feats_train[good_columns],cat_columns), labels_train, cat_features = cat_columns,verbose=10, plot=True,
                 eval_set = (cat_prep(feats_val[good_columns],cat_columns), labels_val))
0:	learn: 0.5447010	test: 0.5331667	best: 0.5331667 (0)	total: 96.5ms	remaining: 36.6s
10:	learn: 0.5710723	test: 0.5727358	best: 0.5727358 (10)	total: 857ms	remaining: 28.7s
20:	learn: 0.5846300	test: 0.5855039	best: 0.5855039 (20)	total: 1.65s	remaining: 28.3s
30:	learn: 0.6770678	test: 0.6701294	best: 0.6701294 (30)	total: 2.47s	remaining: 27.8s
40:	learn: 0.7173511	test: 0.7078972	best: 0.7078972 (40)	total: 3.37s	remaining: 27.8s
50:	learn: 0.7262440	test: 0.7148843	best: 0.7148843 (50)	total: 4.24s	remaining: 27.4s
60:	learn: 0.7315806	test: 0.7184889	best: 0.7185946 (57)	total: 5.12s	remaining: 26.8s
70:	learn: 0.7366703	test: 0.7222066	best: 0.7223936 (67)	total: 5.99s	remaining: 26.1s
80:	learn: 0.7419034	test: 0.7254476	best: 0.7257717 (79)	total: 6.89s	remaining: 25.4s
90:	learn: 0.7466782	test: 0.7281565	best: 0.7283261 (89)	total: 7.72s	remaining: 24.5s
100:	learn: 0.7488860	test: 0.7292999	best: 0.7293321 (99)	total: 8.63s	remaining: 23.8s
110:	learn: 0.7522508	test: 0.7307438	best: 0.7307438 (110)	total: 9.47s	remaining: 23s
120:	learn: 0.7536407	test: 0.7312691	best: 0.7313125 (119)	total: 10.3s	remaining: 22.1s
130:	learn: 0.7552121	test: 0.7314820	best: 0.7319006 (126)	total: 11.2s	remaining: 21.2s
140:	learn: 0.7565901	test: 0.7323828	best: 0.7323828 (140)	total: 12s	remaining: 20.3s
150:	learn: 0.7579570	test: 0.7325053	best: 0.7325128 (142)	total: 12.8s	remaining: 19.4s
160:	learn: 0.7588528	test: 0.7319398	best: 0.7325128 (142)	total: 13.6s	remaining: 18.6s
170:	learn: 0.7594941	test: 0.7319781	best: 0.7325128 (142)	total: 14.5s	remaining: 17.7s
180:	learn: 0.7605938	test: 0.7318513	best: 0.7325128 (142)	total: 15.3s	remaining: 16.8s
190:	learn: 0.7613095	test: 0.7318365	best: 0.7325128 (142)	total: 16.1s	remaining: 15.9s
200:	learn: 0.7620048	test: 0.7316931	best: 0.7325128 (142)	total: 16.9s	remaining: 15s
210:	learn: 0.7633401	test: 0.7315574	best: 0.7325128 (142)	total: 17.7s	remaining: 14.2s
220:	learn: 0.7643434	test: 0.7314584	best: 0.7325128 (142)	total: 18.6s	remaining: 13.3s
230:	learn: 0.7653988	test: 0.7316711	best: 0.7325128 (142)	total: 19.4s	remaining: 12.5s
240:	learn: 0.7659969	test: 0.7316545	best: 0.7325128 (142)	total: 20.2s	remaining: 11.6s
250:	learn: 0.7664981	test: 0.7316199	best: 0.7325128 (142)	total: 21s	remaining: 10.8s
260:	learn: 0.7675496	test: 0.7315992	best: 0.7325128 (142)	total: 21.8s	remaining: 9.94s
270:	learn: 0.7685535	test: 0.7313873	best: 0.7325128 (142)	total: 22.7s	remaining: 9.11s
280:	learn: 0.7693712	test: 0.7310309	best: 0.7325128 (142)	total: 23.4s	remaining: 8.26s
290:	learn: 0.7704536	test: 0.7318513	best: 0.7325128 (142)	total: 24.2s	remaining: 7.42s
300:	learn: 0.7706518	test: 0.7315752	best: 0.7325128 (142)	total: 25s	remaining: 6.55s
310:	learn: 0.7711117	test: 0.7315986	best: 0.7325128 (142)	total: 25.8s	remaining: 5.71s
320:	learn: 0.7716607	test: 0.7313619	best: 0.7325128 (142)	total: 26.5s	remaining: 4.87s
330:	learn: 0.7725669	test: 0.7317505	best: 0.7325128 (142)	total: 27.2s	remaining: 4.03s
340:	learn: 0.7729231	test: 0.7315090	best: 0.7325128 (142)	total: 28s	remaining: 3.21s
350:	learn: 0.7738827	test: 0.7315513	best: 0.7325128 (142)	total: 28.8s	remaining: 2.38s
360:	learn: 0.7747361	test: 0.7319160	best: 0.7325128 (142)	total: 29.7s	remaining: 1.56s
370:	learn: 0.7756793	test: 0.7323199	best: 0.7325128 (142)	total: 30.5s	remaining: 740ms
379:	learn: 0.7766857	test: 0.7320912	best: 0.7325128 (142)	total: 31.3s	remaining: 0us
bestTest = 0.7325127721
bestIteration = 142
Shrink model to first 143 iterations.
Out[ ]:
<catboost.core.CatBoostClassifier at 0x7f91b877d828>
In [ ]:
train_pool = Pool(cat_prep(feats_train[good_columns],cat_columns), labels_train, cat_features = cat_columns)
np.array(estimator_cb.get_feature_importance(train_pool,'LossFunctionChange', prettified=True))
Out[ ]:
array([['Var126', 0.010024201668431524],
       ['Var189', 0.0013942264248186557],
       ['Var113', 0.0010710856048719852],
       ['Var205', 0.0010180821630570343],
       ['Var218', 0.0009153584653482372],
       ['Var73', 0.0007406078610784165],
       ['Var81', 0.0006467163946333732],
       ['Var7', 0.00043778510848130336],
       ['Var74', 0.0004341491266758637],
       ['Var210', 0.000412991734865531],
       ['Var229', 0.00040290717966234846],
       ['Var228', 0.00036472501119487005],
       ['Var227', 0.000263829562905595],
       ['Var226', 0.00015396149885077887],
       ['Var28', 7.070645847293926e-05],
       ['Var6', 4.118281413373337e-05],
       ['Var195', 3.351086565972494e-05],
       ['Var57', 3.3187765988651474e-05],
       ['Var22', 2.9433429972525182e-05],
       ['Var13', 1.0434922235347255e-05],
       ['Var153', 1.005580138302431e-05],
       ['Var132', 9.405601493928106e-06],
       ['Var223', 8.770249852574352e-06],
       ['Var85', 5.538518083159083e-06],
       ['Var24', 0.0],
       ['Var25', 0.0],
       ['Var38', 0.0],
       ['Var76', 0.0],
       ['Var83', 0.0],
       ['Var94', 0.0],
       ['Var109', 0.0],
       ['Var112', 0.0],
       ['Var119', 0.0],
       ['Var123', 0.0],
       ['Var125', 0.0],
       ['Var133', 0.0],
       ['Var134', 0.0],
       ['Var140', 0.0],
       ['Var149', 0.0],
       ['Var160', 0.0],
       ['Var163', 0.0],
       ['Var44', 0.0],
       ['Var72', 0.0],
       ['Var143', 0.0],
       ['Var144', 0.0],
       ['Var173', 0.0],
       ['Var181', 0.0],
       ['Var196', 0.0],
       ['Var203', 0.0],
       ['Var208', 0.0],
       ['Var211', 0.0],
       ['Var221', 0.0],
       ['Var225', 0.0],
       ['Var222', -2.6252718101036736e-06],
       ['Var21', -6.2364672374082275e-06],
       ['Var198', -0.00010255833881910499],
       ['Var193', -0.00017332622385426524],
       ['Var78', -0.00034546029903722664],
       ['Var65', -0.0003603076190034221],
       ['Var220', -0.0003755514450442717],
       ['Var219', -0.0006858344408631478],
       ['Var216', -0.0009654871276644217],
       ['Var204', -0.0010022270653589488],
       ['Var35', -0.0011273031559189545],
       ['Var197', -0.0015999774877056838],
       ['Var207', -0.0017051645723122435],
       ['Var206', -0.001783177137175726],
       ['Var212', -0.0018343275265224473],
       ['Var202', -0.0029542948336417074],
       ['Var192', -0.0035754854623927337],
       ['Var217', -0.005039384810022559],
       ['Var199', -0.007050946416850501]], dtype=object)
In [ ]:
cat_columns2 = cat_columns.copy()
for var in ['Var199','Var217']:
  cat_columns2.remove(var)
good_columns2 = num_columns + cat_columns2 

estimator_cb = CatBoostClassifier(task_type='GPU',iterations=200,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.08,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )

estimator_cb.fit(cat_prep(features[good_columns2],cat_columns2), labels, cat_features = cat_columns2,verbose=10, plot=True)                 
probs = estimator_cb.predict_proba(cat_prep(test_data[good_columns2],cat_columns2))
write_to_submission_file(probs[:,1],out_file='submission_cb8.csv')
0:	learn: 0.5080304	total: 74ms	remaining: 14.7s
10:	learn: 0.5556120	total: 838ms	remaining: 14.4s
20:	learn: 0.5598488	total: 1.59s	remaining: 13.6s
30:	learn: 0.6327914	total: 2.34s	remaining: 12.7s
40:	learn: 0.6817296	total: 3.13s	remaining: 12.1s
50:	learn: 0.7132914	total: 3.98s	remaining: 11.6s
60:	learn: 0.7185804	total: 4.84s	remaining: 11s
70:	learn: 0.7271850	total: 5.65s	remaining: 10.3s
80:	learn: 0.7320981	total: 6.47s	remaining: 9.51s
90:	learn: 0.7356150	total: 7.36s	remaining: 8.82s
100:	learn: 0.7396625	total: 8.17s	remaining: 8.01s
110:	learn: 0.7440588	total: 9.02s	remaining: 7.23s
120:	learn: 0.7458944	total: 9.84s	remaining: 6.43s
130:	learn: 0.7482274	total: 10.6s	remaining: 5.6s
140:	learn: 0.7504014	total: 11.4s	remaining: 4.78s
150:	learn: 0.7512612	total: 12.2s	remaining: 3.94s
160:	learn: 0.7520340	total: 12.9s	remaining: 3.13s
170:	learn: 0.7528192	total: 13.7s	remaining: 2.33s
180:	learn: 0.7535923	total: 14.5s	remaining: 1.52s
190:	learn: 0.7550347	total: 15.3s	remaining: 723ms
199:	learn: 0.7556133	total: 16s	remaining: 0us
In [ ]:
sub_names = ['submission_cb1','submission_cb2','submission_cb3','submission_cb4','submission_cb5','submission_cb6','submission_cb7',
             'submission_cb8','submission3','submission4','submission5','submission6','submission7','submission8','submission9','submission_final']
submission_list = []
for submission_name in sub_names:
  sub = pd.read_csv(f'{submission_name}.csv',index_col=0)
  submission_list.append(sub) 
sub_df = pd.concat(submission_list,axis=1)
sub_df.head()
Out[ ]:
result result result result result result result result result result result result result result result result
Id
0 0.092013 0.080946 0.081988 0.081819 0.082297 0.245850 0.063512 0.067114 0.064954 0.033749 0.466653 0.263725 0.106894 0.063221 0.060926 0.060926
1 0.108802 0.112910 0.111290 0.096106 0.098092 0.282603 0.112622 0.093682 0.117818 0.056393 0.594583 0.411401 0.098931 0.105554 0.108696 0.108696
2 0.019675 0.020165 0.020478 0.021503 0.020924 0.070057 0.024609 0.026490 0.028061 0.012662 0.240480 0.142153 0.033888 0.023810 0.025996 0.025996
3 0.060231 0.066768 0.060720 0.053580 0.068739 0.212873 0.077320 0.082234 0.078644 0.044644 0.510958 0.326529 0.126811 0.083559 0.082151 0.082151
4 0.027637 0.022772 0.023751 0.024252 0.026784 0.092238 0.029508 0.034319 0.021621 0.012318 0.152059 0.101077 0.032756 0.021486 0.023285 0.023285
In [ ]:
sub_df['mean'] = sub_df.mean(axis=1)
sub_df.head()
Out[ ]:
result result result result result result result result result result result result result result result result mean
Id
0 0.092013 0.080946 0.081988 0.081819 0.082297 0.245850 0.063512 0.067114 0.064954 0.033749 0.466653 0.263725 0.106894 0.063221 0.060926 0.060926 0.119787
1 0.108802 0.112910 0.111290 0.096106 0.098092 0.282603 0.112622 0.093682 0.117818 0.056393 0.594583 0.411401 0.098931 0.105554 0.108696 0.108696 0.163636
2 0.019675 0.020165 0.020478 0.021503 0.020924 0.070057 0.024609 0.026490 0.028061 0.012662 0.240480 0.142153 0.033888 0.023810 0.025996 0.025996 0.047309
3 0.060231 0.066768 0.060720 0.053580 0.068739 0.212873 0.077320 0.082234 0.078644 0.044644 0.510958 0.326529 0.126811 0.083559 0.082151 0.082151 0.126120
4 0.027637 0.022772 0.023751 0.024252 0.026784 0.092238 0.029508 0.034319 0.021621 0.012318 0.152059 0.101077 0.032756 0.021486 0.023285 0.023285 0.041822
In [ ]:
write_to_submission_file(sub_df[['mean']].values,out_file='submission_mean1.csv')

image.png

In [ ]:
#Индексы значемых признаков полсле предобработки
indices_lists = data_preprocessor(feats_train,params=[0.7, 0.5, 30])# params = [prop_nan,max_prop_unique,N_lim]


#Заполнение пропущенных значений
#Числовые признаки
numeric_imputer = impute.SimpleImputer()#по умолчанию заполняется средними значениями
numeric_scaler = None

#Категориальные признаки
occurrence_med_prop = 0.1#доля размера категории от медианного значения для признака для дальнейшего объединения таких категорий в одну
cat_imputer1 =  MyCatMergerNaInputer(occurrence_med_prop = occurrence_med_prop,fill_value='Hi')
cat_imputer2 =  MyCatMergerNaInputer(occurrence_med_prop = occurrence_med_prop,fill_value='Hi')

#Методы обработки категориальных признаков
(low_cardinality_cat_transformer1,high_cardinality_cat_transformer1) = (preprocessing.OneHotEncoder(handle_unknown = 'ignore'),
                                                                        preprocessing.OneHotEncoder(handle_unknown = 'ignore'))
                                                                        
#Список трансформеров низкого уровня                                                                      
low_level_transformers = [numeric_imputer,numeric_scaler,cat_imputer1,cat_imputer2,\
                low_cardinality_cat_transformer1,high_cardinality_cat_transformer1]
#Итоговый трансформер
transformer = transformation(low_level_transformers,indices_lists)
In [ ]:
clf = lgb.LGBMClassifier(random_state=42,n_estimators=100,max_depth=3)
    
estimator = Estimator(transformer,clf)
In [ ]:
warnings.filterwarnings('ignore')

estimator.fit(feats_train.values,labels_train.values)

prb = estimator.predict_proba(feats_val.values)                                                                          
                                                                
print(f'ROC AUC: {roc_auc_score(labels_val.values,prb[:,1])}')    
print(f'PRC AUC: {average_precision_score(labels_val.values,prb[:,1])}')   
                                                                        
pred = estimator.predict(feats_val.values)
print(classification_report(labels_val.values,pred))  
print('------------------')
print('------------------')

pr_plot(labels_val.values,prb,[0.05,0.1,0.2,0.25,0.3,0.35,0.4,0.5,0.6,0.7,0.8,0.9,0.95,0.96,0.97,0.99])
ROC AUC: 0.7402992130604562
PRC AUC: 0.2129460031700871
              precision    recall  f1-score   support

          -1       0.93      1.00      0.96     11107
           1       0.53      0.01      0.02       893

    accuracy                           0.93     12000
   macro avg       0.73      0.50      0.49     12000
weighted avg       0.90      0.93      0.89     12000

------------------
------------------
In [ ]:
clf = lgb.LGBMClassifier(random_state=42,n_estimators=100,max_depth=3)
    
estimator = Estimator(transformer,clf)

estimator.fit(features.values, labels.values)                 
probs = estimator.predict_proba(test_data.values)
write_to_submission_file(probs[:,1],out_file='submission_lgb1.csv')
In [ ]:
clf = lgb.LGBMClassifier(random_state=42,n_estimators=200,max_depth=-1, learning_rate=0.05,reg_alpha=0.05,reg_lambda=0.05)
    
estimator = Estimator(transformer,clf)

warnings.filterwarnings('ignore')

estimator.fit(feats_train.values,labels_train.values)

prb = estimator.predict_proba(feats_val.values)                                                                          
                                                                
print(f'ROC AUC: {roc_auc_score(labels_val.values,prb[:,1])}')    
print(f'PRC AUC: {average_precision_score(labels_val.values,prb[:,1])}')   
                                                                        
pred = estimator.predict(feats_val.values)
print(classification_report(labels_val.values,pred))  
print('------------------')
print('------------------')

pr_plot(labels_val.values,prb,[0.05,0.1,0.2,0.25,0.3,0.35,0.4,0.5,0.6,0.7,0.8,0.9,0.95,0.96,0.97,0.99])
ROC AUC: 0.7304670309201413
PRC AUC: 0.2164439617993148
              precision    recall  f1-score   support

          -1       0.93      1.00      0.96     11107
           1       0.60      0.04      0.07       893

    accuracy                           0.93     12000
   macro avg       0.77      0.52      0.51     12000
weighted avg       0.90      0.93      0.90     12000

------------------
------------------
In [ ]:
clf = lgb.LGBMClassifier(random_state=42,n_estimators=200,max_depth=-1, learning_rate=0.05,reg_alpha=0.05,reg_lambda=0.05)
    
estimator = Estimator(transformer,clf)

estimator.fit(features.values, labels.values)                 
probs = estimator.predict_proba(test_data.values)
write_to_submission_file(probs[:,1],out_file='submission_lgb2.csv')

image.png

In [ ]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=380,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.08,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4,
                                  )

estimator_cb.fit(cat_prep(feats_train[good_columns],cat_columns), labels_train, cat_features = cat_columns,verbose=10, plot=True,
                 eval_set = (cat_prep(feats_val[good_columns],cat_columns), labels_val))
prb = estimator_cb.predict_proba(cat_prep(feats_val[good_columns],cat_columns))                                                                          
                                                                
print(f'ROC AUC: {roc_auc_score(labels_val.values,prb[:,1])}')    
print(f'PRC AUC: {average_precision_score(labels_val.values,prb[:,1])}')   
                                                                        
pred = estimator_cb.predict(cat_prep(feats_val[good_columns],cat_columns))
print(classification_report(labels_val,pred))  
print('------------------')
print('------------------')

pr_plot(labels_val.values,prb,[0.05,0.1,0.2,0.25,0.3,0.35,0.4,0.5,0.6,0.7,0.8,0.9,0.95,0.96,0.97,0.99])
0:	learn: 0.5447010	test: 0.5331667	best: 0.5331667 (0)	total: 167ms	remaining: 1m 3s
10:	learn: 0.5710723	test: 0.5727358	best: 0.5727358 (10)	total: 1.37s	remaining: 46s
20:	learn: 0.5846300	test: 0.5855039	best: 0.5855039 (20)	total: 2.54s	remaining: 43.4s
30:	learn: 0.6770678	test: 0.6701294	best: 0.6701294 (30)	total: 3.76s	remaining: 42.4s
40:	learn: 0.7173510	test: 0.7078972	best: 0.7078972 (40)	total: 5.14s	remaining: 42.5s
50:	learn: 0.7262440	test: 0.7148843	best: 0.7148843 (50)	total: 6.48s	remaining: 41.8s
60:	learn: 0.7315806	test: 0.7184889	best: 0.7185946 (57)	total: 7.83s	remaining: 41s
70:	learn: 0.7366703	test: 0.7222066	best: 0.7223936 (67)	total: 9.18s	remaining: 39.9s
80:	learn: 0.7419034	test: 0.7254476	best: 0.7257717 (79)	total: 10.5s	remaining: 38.8s
90:	learn: 0.7466774	test: 0.7281559	best: 0.7283258 (89)	total: 11.8s	remaining: 37.4s
100:	learn: 0.7488852	test: 0.7292996	best: 0.7293316 (99)	total: 13.2s	remaining: 36.3s
110:	learn: 0.7522504	test: 0.7307437	best: 0.7307437 (110)	total: 14.4s	remaining: 35s
120:	learn: 0.7536410	test: 0.7312689	best: 0.7313123 (119)	total: 15.7s	remaining: 33.6s
130:	learn: 0.7552153	test: 0.7314774	best: 0.7318964 (126)	total: 16.9s	remaining: 32.2s
140:	learn: 0.7565937	test: 0.7323776	best: 0.7323776 (140)	total: 18.2s	remaining: 30.8s
150:	learn: 0.7579603	test: 0.7324996	best: 0.7325070 (142)	total: 19.4s	remaining: 29.5s
160:	learn: 0.7588569	test: 0.7319368	best: 0.7325070 (142)	total: 20.7s	remaining: 28.2s
170:	learn: 0.7594981	test: 0.7319732	best: 0.7325070 (142)	total: 21.9s	remaining: 26.8s
180:	learn: 0.7605974	test: 0.7318482	best: 0.7325070 (142)	total: 23.1s	remaining: 25.4s
190:	learn: 0.7613125	test: 0.7318316	best: 0.7325070 (142)	total: 24.4s	remaining: 24.1s
200:	learn: 0.7620087	test: 0.7316887	best: 0.7325070 (142)	total: 25.6s	remaining: 22.8s
210:	learn: 0.7633440	test: 0.7315555	best: 0.7325070 (142)	total: 26.9s	remaining: 21.5s
220:	learn: 0.7643475	test: 0.7314553	best: 0.7325070 (142)	total: 28.1s	remaining: 20.2s
230:	learn: 0.7654030	test: 0.7316672	best: 0.7325070 (142)	total: 29.4s	remaining: 19s
240:	learn: 0.7659992	test: 0.7316511	best: 0.7325070 (142)	total: 30.7s	remaining: 17.7s
250:	learn: 0.7664653	test: 0.7315881	best: 0.7325070 (142)	total: 31.8s	remaining: 16.4s
260:	learn: 0.7677080	test: 0.7318567	best: 0.7325070 (142)	total: 33.1s	remaining: 15.1s
270:	learn: 0.7681869	test: 0.7313173	best: 0.7325070 (142)	total: 34.4s	remaining: 13.8s
280:	learn: 0.7684340	test: 0.7312691	best: 0.7325070 (142)	total: 35.5s	remaining: 12.5s
290:	learn: 0.7692183	test: 0.7307729	best: 0.7325070 (142)	total: 36.8s	remaining: 11.3s
300:	learn: 0.7701237	test: 0.7307494	best: 0.7325070 (142)	total: 38.1s	remaining: 10s
310:	learn: 0.7705575	test: 0.7304407	best: 0.7325070 (142)	total: 39.2s	remaining: 8.7s
320:	learn: 0.7711840	test: 0.7300714	best: 0.7325070 (142)	total: 40.4s	remaining: 7.42s
330:	learn: 0.7714806	test: 0.7296440	best: 0.7325070 (142)	total: 41.6s	remaining: 6.16s
340:	learn: 0.7721810	test: 0.7299536	best: 0.7325070 (142)	total: 42.8s	remaining: 4.89s
350:	learn: 0.7732477	test: 0.7302385	best: 0.7325070 (142)	total: 44s	remaining: 3.63s
360:	learn: 0.7735129	test: 0.7300039	best: 0.7325070 (142)	total: 45.1s	remaining: 2.38s
370:	learn: 0.7743750	test: 0.7300563	best: 0.7325070 (142)	total: 46.3s	remaining: 1.12s
379:	learn: 0.7751703	test: 0.7301626	best: 0.7325070 (142)	total: 47.4s	remaining: 0us
bestTest = 0.7325069904
bestIteration = 142
Shrink model to first 143 iterations.
ROC AUC: 0.7325070970548017
PRC AUC: 0.20172309738059707
              precision    recall  f1-score   support

          -1       0.93      1.00      0.96     11107
           1       0.60      0.01      0.01       893

    accuracy                           0.93     12000
   macro avg       0.76      0.50      0.49     12000
weighted avg       0.90      0.93      0.89     12000

------------------
------------------
In [ ]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=120,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.08,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4,
                                  l2_leaf_reg=0.1
                                  )

estimator_cb.fit(cat_prep(feats_train[good_columns],cat_columns), labels_train, cat_features = cat_columns,verbose=10, plot=True,
                 eval_set = (cat_prep(feats_val[good_columns],cat_columns), labels_val))
prb = estimator_cb.predict_proba(cat_prep(feats_val[good_columns],cat_columns))                                                                          
                                                                
print(f'ROC AUC: {roc_auc_score(labels_val.values,prb[:,1])}')    
print(f'PRC AUC: {average_precision_score(labels_val.values,prb[:,1])}')   
                                                                        
pred = estimator_cb.predict(cat_prep(feats_val[good_columns],cat_columns))
print(classification_report(labels_val,pred))  
print('------------------')
print('------------------')

pr_plot(labels_val.values,prb,[0.05,0.1,0.2,0.25,0.3,0.35,0.4,0.5,0.6,0.7,0.8,0.9,0.95,0.96,0.97,0.99])
0:	learn: 0.5447010	test: 0.5331667	best: 0.5331667 (0)	total: 129ms	remaining: 15.3s
10:	learn: 0.5692714	test: 0.5710864	best: 0.5710864 (10)	total: 1.26s	remaining: 12.5s
20:	learn: 0.5857139	test: 0.5863981	best: 0.5863981 (20)	total: 2.43s	remaining: 11.5s
30:	learn: 0.6447096	test: 0.6439496	best: 0.6439496 (30)	total: 3.63s	remaining: 10.4s
40:	learn: 0.7052917	test: 0.7006139	best: 0.7006139 (40)	total: 4.93s	remaining: 9.51s
50:	learn: 0.7185763	test: 0.7098187	best: 0.7098187 (50)	total: 6.29s	remaining: 8.51s
60:	learn: 0.7354473	test: 0.7220390	best: 0.7220390 (60)	total: 7.64s	remaining: 7.39s
70:	learn: 0.7427671	test: 0.7266952	best: 0.7266952 (70)	total: 8.95s	remaining: 6.17s
80:	learn: 0.7450238	test: 0.7274090	best: 0.7277957 (74)	total: 10.2s	remaining: 4.93s
90:	learn: 0.7472610	test: 0.7272773	best: 0.7278890 (85)	total: 11.5s	remaining: 3.66s
100:	learn: 0.7496955	test: 0.7279947	best: 0.7279947 (100)	total: 12.8s	remaining: 2.4s
110:	learn: 0.7510720	test: 0.7281052	best: 0.7283451 (108)	total: 14.1s	remaining: 1.14s
119:	learn: 0.7523189	test: 0.7282507	best: 0.7285211 (116)	total: 15.2s	remaining: 0us
bestTest = 0.7285211086
bestIteration = 116
Shrink model to first 117 iterations.
ROC AUC: 0.7285211317661219
PRC AUC: 0.1982602557230022
              precision    recall  f1-score   support

          -1       0.93      1.00      0.96     11107
           1       0.57      0.00      0.01       893

    accuracy                           0.93     12000
   macro avg       0.75      0.50      0.49     12000
weighted avg       0.90      0.93      0.89     12000

------------------
------------------
In [ ]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=120,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.08,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4,
                                  l2_leaf_reg=0.1
                                  )

estimator_cb.fit(cat_prep(features[good_columns],cat_columns), labels, cat_features = cat_columns,verbose=10, plot=True)                 
probs = estimator_cb.predict_proba(cat_prep(test_data[good_columns],cat_columns))
write_to_submission_file(probs[:,1],out_file='submission_cb9.csv')
0:	learn: 0.5667429	total: 136ms	remaining: 16.2s
10:	learn: 0.5751854	total: 1.37s	remaining: 13.6s
20:	learn: 0.5822270	total: 2.49s	remaining: 11.7s
30:	learn: 0.6431841	total: 3.66s	remaining: 10.5s
40:	learn: 0.6941811	total: 4.9s	remaining: 9.45s
50:	learn: 0.7052810	total: 6.19s	remaining: 8.38s
60:	learn: 0.7162427	total: 7.38s	remaining: 7.14s
70:	learn: 0.7279038	total: 8.63s	remaining: 5.96s
80:	learn: 0.7326342	total: 9.87s	remaining: 4.75s
90:	learn: 0.7359138	total: 11.1s	remaining: 3.54s
100:	learn: 0.7392089	total: 12.4s	remaining: 2.32s
110:	learn: 0.7429389	total: 13.7s	remaining: 1.11s
119:	learn: 0.7445455	total: 14.8s	remaining: 0us

image.png

In [19]:
def normed_fe_interaction(dataset,level=3, max_feats = 10,num_columns=[]):
  #Создает признаки на основе комбинаций существующих
  dataset = dataset.copy()
  data = dataset[num_columns]
  
  fe_list = []
 
  for comb in list(itertools.combinations(data.columns,level))[:max_feats]:
  
    if level==2:
      a = (data[comb[0]] - data[comb[0]].mean())/data[comb[0]].std()
      b = (data[comb[1]] - data[comb[1]].mean())/data[comb[1]].std()      
      feature = a*b
      feature.name = f'{comb[0]},{comb[1]}_mix'
      
    elif level==3:
      a = (data[comb[0]] - data[comb[0]].mean())/data[comb[0]].std()
      b = (data[comb[1]] - data[comb[1]].mean())/data[comb[1]].std()
      c = (data[comb[2]] - data[comb[2]].mean())/data[comb[2]].std()
      feature = a*b + a*c + b*c
      feature.name = f'{comb[0]},{comb[1]},{comb[2]}_mix'

    elif level==4:
      a = (data[comb[0]] - data[comb[0]].mean())/data[comb[0]].std()
      b = (data[comb[1]] - data[comb[1]].mean())/data[comb[1]].std()
      c = (data[comb[2]] - data[comb[2]].mean())/data[comb[2]].std()
      d = (data[comb[3]] - data[comb[3]].mean())/data[comb[3]].std()
      feature = a*b + a*c + a*d + b*d + b*c + c*d       
      feature.name = f'{comb[0]},{comb[1]},{comb[2]},{comb[3]}_mix'

    elif level==5:
      a = (data[comb[0]] - data[comb[0]].mean())/data[comb[0]].std()
      b = (data[comb[1]] - data[comb[1]].mean())/data[comb[1]].std()
      c = (data[comb[2]] - data[comb[2]].mean())/data[comb[2]].std()
      d = (data[comb[3]] - data[comb[3]].mean())/data[comb[3]].std()
      e = (data[comb[4]] - data[comb[4]].mean())/data[comb[4]].std()
      feature = a*b + a*c + a*d +a*e + b*c + b*d + b*e + c*d + c*e + d*e
      feature.name = f'{comb[0]},{comb[1]},{comb[2]},{comb[3]},{comb[4]}_mix'      

    fe_list.append(pd.DataFrame(feature))
  if fe_list==[]:
    features = pd.DataFrame()
  else:
    features = pd.concat(fe_list,axis=1) 
  res_dataframe = pd.concat([dataset,features],axis=1)

  return res_dataframe
In [ ]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=380,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.08,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )
data_train = cat_prep(normed_fe_interaction(feats_train[good_columns],level=3, max_feats = 100,num_columns=num_columns),cat_columns)
data_val = cat_prep(normed_fe_interaction(feats_val[good_columns],level=3, max_feats = 100,num_columns=num_columns),cat_columns)

estimator_cb.fit(data_train, labels_train, cat_features = cat_columns,verbose=10, plot=True,
                 eval_set = (data_val, labels_val))
prb = estimator_cb.predict_proba(data_val)                                                                          
                                                                
print(f'ROC AUC: {roc_auc_score(labels_val.values,prb[:,1])}')    
print(f'PRC AUC: {average_precision_score(labels_val.values,prb[:,1])}')   
                                                                        
pred = estimator_cb.predict(data_val)
print(classification_report(labels_val,pred))  
print('------------------')
print('------------------')

pr_plot(labels_val.values,prb,[0.05,0.1,0.2,0.25,0.3,0.35,0.4,0.5,0.6,0.7,0.8,0.9,0.95,0.96,0.97,0.99])
0:	learn: 0.5447010	test: 0.5331667	best: 0.5331667 (0)	total: 175ms	remaining: 1m 6s
10:	learn: 0.5713693	test: 0.5665722	best: 0.5665722 (10)	total: 1.49s	remaining: 49.9s
20:	learn: 0.5779825	test: 0.5791406	best: 0.5791406 (20)	total: 2.73s	remaining: 46.7s
30:	learn: 0.6769742	test: 0.6673879	best: 0.6673879 (30)	total: 4.08s	remaining: 45.9s
40:	learn: 0.7180405	test: 0.7080745	best: 0.7080745 (40)	total: 5.45s	remaining: 45.1s
50:	learn: 0.7260944	test: 0.7136649	best: 0.7136649 (50)	total: 6.91s	remaining: 44.6s
60:	learn: 0.7337799	test: 0.7186048	best: 0.7187096 (57)	total: 8.34s	remaining: 43.6s
70:	learn: 0.7401711	test: 0.7226151	best: 0.7226151 (70)	total: 9.74s	remaining: 42.4s
80:	learn: 0.7428436	test: 0.7232714	best: 0.7232714 (80)	total: 11.2s	remaining: 41.3s
90:	learn: 0.7447283	test: 0.7236127	best: 0.7240006 (88)	total: 12.6s	remaining: 40.1s
100:	learn: 0.7474149	test: 0.7255445	best: 0.7255445 (100)	total: 14s	remaining: 38.8s
110:	learn: 0.7494779	test: 0.7268069	best: 0.7268069 (110)	total: 15.5s	remaining: 37.5s
120:	learn: 0.7513119	test: 0.7278796	best: 0.7279155 (119)	total: 16.9s	remaining: 36.1s
130:	learn: 0.7535687	test: 0.7280850	best: 0.7282544 (126)	total: 18.3s	remaining: 34.8s
140:	learn: 0.7546251	test: 0.7285581	best: 0.7285608 (138)	total: 19.6s	remaining: 33.3s
150:	learn: 0.7568277	test: 0.7302582	best: 0.7302582 (150)	total: 21s	remaining: 31.8s
160:	learn: 0.7583568	test: 0.7309818	best: 0.7309818 (160)	total: 22.3s	remaining: 30.4s
170:	learn: 0.7589253	test: 0.7312025	best: 0.7312025 (170)	total: 23.6s	remaining: 28.8s
180:	learn: 0.7603454	test: 0.7311414	best: 0.7314367 (177)	total: 24.9s	remaining: 27.4s
190:	learn: 0.7613244	test: 0.7307607	best: 0.7314367 (177)	total: 26.2s	remaining: 25.9s
200:	learn: 0.7633172	test: 0.7318498	best: 0.7320467 (194)	total: 27.6s	remaining: 24.6s
210:	learn: 0.7641692	test: 0.7312329	best: 0.7320467 (194)	total: 28.9s	remaining: 23.2s
220:	learn: 0.7651601	test: 0.7316284	best: 0.7320467 (194)	total: 30.3s	remaining: 21.8s
230:	learn: 0.7664787	test: 0.7315655	best: 0.7320467 (194)	total: 31.7s	remaining: 20.5s
240:	learn: 0.7673047	test: 0.7319013	best: 0.7320467 (194)	total: 33.1s	remaining: 19.1s
250:	learn: 0.7683206	test: 0.7323526	best: 0.7323526 (250)	total: 34.4s	remaining: 17.7s
260:	learn: 0.7693796	test: 0.7321006	best: 0.7323526 (250)	total: 35.7s	remaining: 16.3s
270:	learn: 0.7709666	test: 0.7326917	best: 0.7327129 (269)	total: 37.1s	remaining: 14.9s
280:	learn: 0.7714432	test: 0.7324358	best: 0.7327129 (269)	total: 38.4s	remaining: 13.5s
290:	learn: 0.7728767	test: 0.7323507	best: 0.7327242 (281)	total: 39.7s	remaining: 12.1s
300:	learn: 0.7740545	test: 0.7321448	best: 0.7327242 (281)	total: 41.1s	remaining: 10.8s
310:	learn: 0.7752417	test: 0.7320801	best: 0.7327242 (281)	total: 42.4s	remaining: 9.41s
320:	learn: 0.7756580	test: 0.7322249	best: 0.7327242 (281)	total: 43.7s	remaining: 8.03s
330:	learn: 0.7766075	test: 0.7324897	best: 0.7327242 (281)	total: 45s	remaining: 6.66s
340:	learn: 0.7769790	test: 0.7325286	best: 0.7327242 (281)	total: 46.2s	remaining: 5.29s
350:	learn: 0.7781356	test: 0.7324849	best: 0.7327242 (281)	total: 47.6s	remaining: 3.93s
360:	learn: 0.7784114	test: 0.7321509	best: 0.7327242 (281)	total: 48.8s	remaining: 2.57s
370:	learn: 0.7792056	test: 0.7321807	best: 0.7327242 (281)	total: 50.2s	remaining: 1.22s
379:	learn: 0.7796552	test: 0.7321535	best: 0.7327242 (281)	total: 51.4s	remaining: 0us
bestTest = 0.7327241898
bestIteration = 281
Shrink model to first 282 iterations.
ROC AUC: 0.732724064230753
PRC AUC: 0.2049685156849056
              precision    recall  f1-score   support

          -1       0.93      1.00      0.96     11107
           1       0.52      0.01      0.03       893

    accuracy                           0.93     12000
   macro avg       0.72      0.51      0.49     12000
weighted avg       0.90      0.93      0.89     12000

------------------
------------------
In [ ]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=380,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.08,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )
data = cat_prep(normed_fe_interaction(features[good_columns],level=2, max_feats = 100,num_columns=num_columns),cat_columns)
data_test = cat_prep(normed_fe_interaction(test_data[good_columns],level=2, max_feats = 100,num_columns=num_columns),cat_columns)

estimator_cb.fit(data, labels, cat_features = cat_columns,verbose=10, plot=True)
              
probs = estimator_cb.predict_proba(data_test)
write_to_submission_file(probs[:,1],out_file='submission_cb_new_fe1.csv')
0:	learn: 0.5667429	total: 162ms	remaining: 1m 1s
10:	learn: 0.5745089	total: 1.45s	remaining: 48.7s
20:	learn: 0.5799120	total: 2.68s	remaining: 45.8s
30:	learn: 0.6539477	total: 3.96s	remaining: 44.6s
40:	learn: 0.7038001	total: 5.4s	remaining: 44.6s
50:	learn: 0.7191750	total: 6.86s	remaining: 44.2s
60:	learn: 0.7268145	total: 8.26s	remaining: 43.2s
70:	learn: 0.7331904	total: 9.61s	remaining: 41.8s
80:	learn: 0.7376558	total: 11s	remaining: 40.6s
90:	learn: 0.7395579	total: 12.3s	remaining: 39.1s
100:	learn: 0.7415998	total: 13.7s	remaining: 37.9s
110:	learn: 0.7445791	total: 15.1s	remaining: 36.7s
120:	learn: 0.7454122	total: 16.5s	remaining: 35.3s
130:	learn: 0.7485990	total: 17.9s	remaining: 34s
140:	learn: 0.7501873	total: 19.2s	remaining: 32.6s
150:	learn: 0.7514794	total: 20.6s	remaining: 31.2s
160:	learn: 0.7535968	total: 21.9s	remaining: 29.8s
170:	learn: 0.7547266	total: 23.2s	remaining: 28.4s
180:	learn: 0.7555001	total: 24.6s	remaining: 27s
190:	learn: 0.7571546	total: 25.9s	remaining: 25.6s
200:	learn: 0.7584991	total: 27.2s	remaining: 24.2s
210:	learn: 0.7594814	total: 28.5s	remaining: 22.8s
220:	learn: 0.7600859	total: 29.7s	remaining: 21.4s
230:	learn: 0.7611797	total: 31.1s	remaining: 20s
240:	learn: 0.7620281	total: 32.4s	remaining: 18.7s
250:	learn: 0.7629648	total: 33.7s	remaining: 17.3s
260:	learn: 0.7644175	total: 35s	remaining: 16s
270:	learn: 0.7657898	total: 36.4s	remaining: 14.6s
280:	learn: 0.7661034	total: 37.6s	remaining: 13.3s
290:	learn: 0.7672591	total: 39s	remaining: 11.9s
300:	learn: 0.7674893	total: 40.2s	remaining: 10.5s
310:	learn: 0.7680266	total: 41.4s	remaining: 9.18s
320:	learn: 0.7686199	total: 42.7s	remaining: 7.84s
330:	learn: 0.7689689	total: 43.9s	remaining: 6.5s
340:	learn: 0.7697934	total: 45.2s	remaining: 5.17s
350:	learn: 0.7700800	total: 46.4s	remaining: 3.83s
360:	learn: 0.7710895	total: 47.7s	remaining: 2.51s
370:	learn: 0.7717130	total: 49s	remaining: 1.19s
379:	learn: 0.7720754	total: 50.1s	remaining: 0us

image.png

In [ ]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=380,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.08,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )
data = cat_prep(normed_fe_interaction(features[good_columns],level=3, max_feats = 100,num_columns=num_columns),cat_columns)
data_test = cat_prep(normed_fe_interaction(test_data[good_columns],level=3, max_feats = 100,num_columns=num_columns),cat_columns)

estimator_cb.fit(data, labels, cat_features = cat_columns,verbose=10, plot=True)
              
probs = estimator_cb.predict_proba(data_test)
write_to_submission_file(probs[:,1],out_file='submission_cb_new_fe2.csv')
0:	learn: 0.5667429	total: 152ms	remaining: 57.7s
10:	learn: 0.5745089	total: 1.53s	remaining: 51.5s
20:	learn: 0.5799120	total: 2.76s	remaining: 47.2s
30:	learn: 0.6505364	total: 4.03s	remaining: 45.4s
40:	learn: 0.6949507	total: 5.38s	remaining: 44.5s
50:	learn: 0.7112531	total: 6.8s	remaining: 43.9s
60:	learn: 0.7240741	total: 8.24s	remaining: 43.1s
70:	learn: 0.7330008	total: 9.66s	remaining: 42s
80:	learn: 0.7378567	total: 11s	remaining: 40.6s
90:	learn: 0.7446229	total: 12.4s	remaining: 39.5s
100:	learn: 0.7470711	total: 13.8s	remaining: 38.1s
110:	learn: 0.7487881	total: 15.2s	remaining: 36.9s
120:	learn: 0.7512021	total: 16.6s	remaining: 35.6s
130:	learn: 0.7531900	total: 17.9s	remaining: 34.1s
140:	learn: 0.7548328	total: 19.3s	remaining: 32.7s
150:	learn: 0.7558930	total: 20.6s	remaining: 31.3s
160:	learn: 0.7572969	total: 22s	remaining: 29.9s
170:	learn: 0.7581705	total: 23.3s	remaining: 28.5s
180:	learn: 0.7590770	total: 24.6s	remaining: 27.1s
190:	learn: 0.7596712	total: 26s	remaining: 25.7s
200:	learn: 0.7606993	total: 27.3s	remaining: 24.4s
210:	learn: 0.7619490	total: 28.7s	remaining: 23s
220:	learn: 0.7624644	total: 30s	remaining: 21.6s
230:	learn: 0.7630318	total: 31.2s	remaining: 20.2s
240:	learn: 0.7635603	total: 32.5s	remaining: 18.8s
250:	learn: 0.7641382	total: 33.8s	remaining: 17.4s
260:	learn: 0.7650056	total: 35.2s	remaining: 16s
270:	learn: 0.7659675	total: 36.5s	remaining: 14.7s
280:	learn: 0.7665012	total: 37.8s	remaining: 13.3s
290:	learn: 0.7673984	total: 39.1s	remaining: 11.9s
300:	learn: 0.7676499	total: 40.3s	remaining: 10.6s
310:	learn: 0.7685723	total: 41.7s	remaining: 9.24s
320:	learn: 0.7689739	total: 42.9s	remaining: 7.89s
330:	learn: 0.7695994	total: 44.2s	remaining: 6.54s
340:	learn: 0.7702506	total: 45.5s	remaining: 5.2s
350:	learn: 0.7706053	total: 46.7s	remaining: 3.86s
360:	learn: 0.7718281	total: 48.1s	remaining: 2.53s
370:	learn: 0.7718674	total: 49.4s	remaining: 1.2s
379:	learn: 0.7725214	total: 50.6s	remaining: 0us

image.png

In [ ]:
class OverSamplerEstimator2(BaseEstimator, TransformerMixin):
    def __init__(self, transformer,oversampler,classifier):
        self.transformer = transformer
        self.oversampler = oversampler
        self.classifier = classifier

    def fit(self, X, y):
        self.transformer.fit(X,y)
        X_tr = self.transformer.transform(X)  
        X_resampled, y_resampled = self.oversampler.fit_resample(X_tr,y)     
        self.classifier.fit(X_resampled, y_resampled)
        return self.classifier

    def predict(self, X):
        X_tr = self.transformer.transform(X)        
        return self.classifier.predict(X_tr)
    
    def predict_proba(self,X):
        X_tr = self.transformer.transform(X)        
        return self.classifier.predict_proba(X_tr)
In [ ]:
oversampler_1 = over_sampling.SMOTE()

clf_res = xgb.XGBClassifier(random_state=42,n_estimators=100,gamma=0.1,max_depth=3,reg_alpha=1,min_child_weight=1)
estimator = OverSamplerEstimator2(transformer,oversampler_1,clf_res)

estimator.fit(features.values, labels.values)
probs = estimator.predict_proba(test_data.values)
write_to_submission_file(probs[:,1],out_file='submission_overs1.csv')
In [ ]:
oversampler_2 = over_sampling.RandomOverSampler(sampling_strategy=0.5,random_state=0)

clf_res = xgb.XGBClassifier(random_state=42,n_estimators=100,gamma=0.1,max_depth=3,reg_alpha=1,min_child_weight=1)
estimator = OverSamplerEstimator2(transformer,oversampler_2,clf_res)

estimator.fit(features.values, labels.values)
probs = estimator.predict_proba(test_data.values)
write_to_submission_file(probs[:,1],out_file='submission_overs2.csv')
In [ ]:
%%time
data_train = normed_fe_interaction(features,level=2, max_feats = 100,num_columns=num_columns)
data_test = normed_fe_interaction(test_data,level=2, max_feats = 100,num_columns=num_columns)

indices_lists = data_preprocessor(data_train ,params=[0.7, 0.5, 30])# params = [prop_nan,max_prop_unique,N_lim]


numeric_imputer = impute.SimpleImputer()#по умолчанию заполняется средними значениями
numeric_scaler = None

occurrence_med_prop = 0.1#доля размера категории от медианного значения для признака для дальнейшего объединения таких категорий в одну
cat_imputer1 =  MyCatMergerNaInputer(occurrence_med_prop = occurrence_med_prop,fill_value='Hi')
cat_imputer2 =  MyCatMergerNaInputer(occurrence_med_prop = occurrence_med_prop,fill_value='Hi')

(low_cardinality_cat_transformer1,high_cardinality_cat_transformer1) = (preprocessing.OneHotEncoder(handle_unknown = 'ignore'),
                                                                        preprocessing.OneHotEncoder(handle_unknown = 'ignore'))
                                                                    
low_level_transformers = [numeric_imputer,numeric_scaler,cat_imputer1,cat_imputer2,\
                low_cardinality_cat_transformer1,high_cardinality_cat_transformer1]

transformer = transformation(low_level_transformers,indices_lists)



oversampler_3 = over_sampling.SMOTE(sampling_strategy=0.2,random_state=0)


clf_res = lgb.LGBMClassifier(random_state=42,n_estimators=100,max_depth=3)
estimator = OverSamplerEstimator2(transformer,oversampler_3,clf_res)

estimator.fit(data_train.values, labels.values)
probs = estimator.predict_proba(data_test.values)
write_to_submission_file(probs[:,1],out_file='submission_overs3_lgb.csv')
CPU times: user 18.9 s, sys: 288 ms, total: 19.2 s
Wall time: 19.3 s
In [ ]:
oversampler_4 = over_sampling.RandomOverSampler(sampling_strategy=0.2,random_state=0)


clf_res = lgb.LGBMClassifier(random_state=42,n_estimators=100,max_depth=3)
estimator = OverSamplerEstimator2(transformer,oversampler_4,clf_res)

estimator.fit(data_train.values, labels.values)
probs = estimator.predict_proba(data_test.values)
write_to_submission_file(probs[:,1],out_file='submission_overs4_lgb.csv')

image.png

In [ ]:
sub_names = ['submission_cb1','submission_cb2','submission_cb3','submission_cb4','submission_cb5','submission_cb6','submission_cb7',
             'submission_cb8','submission3','submission4','submission5','submission6','submission7','submission8','submission9','submission_final',
             'submission_cb9','submission_cb_new_fe1','submission_cb_new_fe2','submission_lgb1','submission_lgb2','submission_mean1',
             'submission_overs1','submission_overs2','submission_overs3_lgb','submission_overs4_lgb']
submission_list = []
for submission_name in sub_names:
  sub = pd.read_csv(f'{submission_name}.csv',index_col=0)
  submission_list.append(sub) 
sub_df = pd.concat(submission_list,axis=1)
sub_df.head()
Out[ ]:
result result result result result result result result result result result result result result result result result result result result result result result result result result
Id
0 0.092013 0.080946 0.081988 0.081819 0.082297 0.245850 0.063512 0.067114 0.064954 0.033749 0.466653 0.263725 0.106894 0.063221 0.060926 0.060926 0.079809 0.090643 0.089395 0.060676 0.070488 0.119787 0.072265 0.273081 0.123748 0.132329
1 0.108802 0.112910 0.111290 0.096106 0.098092 0.282603 0.112622 0.093682 0.117818 0.056393 0.594583 0.411401 0.098931 0.105554 0.108696 0.108696 0.098490 0.098212 0.099714 0.114274 0.219322 0.163636 0.121741 0.453040 0.112047 0.265237
2 0.019675 0.020165 0.020478 0.021503 0.020924 0.070057 0.024609 0.026490 0.028061 0.012662 0.240480 0.142153 0.033888 0.023810 0.025996 0.025996 0.028087 0.030480 0.020855 0.025860 0.020843 0.047309 0.029609 0.144720 0.023697 0.069271
3 0.060231 0.066768 0.060720 0.053580 0.068739 0.212873 0.077320 0.082234 0.078644 0.044644 0.510958 0.326529 0.126811 0.083559 0.082151 0.082151 0.066261 0.030072 0.041639 0.084941 0.082500 0.126120 0.090625 0.355757 0.070562 0.126819
4 0.027637 0.022772 0.023751 0.024252 0.026784 0.092238 0.029508 0.034319 0.021621 0.012318 0.152059 0.101077 0.032756 0.021486 0.023285 0.023285 0.037520 0.030243 0.027730 0.021737 0.012308 0.041822 0.033255 0.125286 0.028443 0.056662
In [ ]:
sub_df['mean'] = sub_df.mean(axis=1)
sub_df.head()
Out[ ]:
result result result result result result result result result result result result result result result result result result result result result result result result result result mean
Id
0 0.092013 0.080946 0.081988 0.081819 0.082297 0.245850 0.063512 0.067114 0.064954 0.033749 0.466653 0.263725 0.106894 0.063221 0.060926 0.060926 0.079809 0.090643 0.089395 0.060676 0.070488 0.119787 0.072265 0.273081 0.123748 0.132329 0.116493
1 0.108802 0.112910 0.111290 0.096106 0.098092 0.282603 0.112622 0.093682 0.117818 0.056393 0.594583 0.411401 0.098931 0.105554 0.108696 0.108696 0.098490 0.098212 0.099714 0.114274 0.219322 0.163636 0.121741 0.453040 0.112047 0.265237 0.167842
2 0.019675 0.020165 0.020478 0.021503 0.020924 0.070057 0.024609 0.026490 0.028061 0.012662 0.240480 0.142153 0.033888 0.023810 0.025996 0.025996 0.028087 0.030480 0.020855 0.025860 0.020843 0.047309 0.029609 0.144720 0.023697 0.069271 0.046064
3 0.060231 0.066768 0.060720 0.053580 0.068739 0.212873 0.077320 0.082234 0.078644 0.044644 0.510958 0.326529 0.126811 0.083559 0.082151 0.082151 0.066261 0.030072 0.041639 0.084941 0.082500 0.126120 0.090625 0.355757 0.070562 0.126819 0.118970
4 0.027637 0.022772 0.023751 0.024252 0.026784 0.092238 0.029508 0.034319 0.021621 0.012318 0.152059 0.101077 0.032756 0.021486 0.023285 0.023285 0.037520 0.030243 0.027730 0.021737 0.012308 0.041822 0.033255 0.125286 0.028443 0.056662 0.041698
In [ ]:
write_to_submission_file(sub_df[['mean']].values,out_file='submission_mean2.csv')

image.png

In [ ]:
sub_names = ['submission_cb1','submission_cb2','submission_cb3','submission_cb4','submission_cb5','submission_cb6','submission_cb7',
             'submission3','submission4','submission9','submission_final',
             'submission_cb9','submission_cb_new_fe1','submission_cb_new_fe2','submission_lgb1','submission_lgb2','submission_mean1',
            'submission_overs2','submission_mean2']
submission_list = []
for submission_name in sub_names:
  sub = pd.read_csv(f'{submission_name}.csv',index_col=0)
  submission_list.append(sub) 
sub_df = pd.concat(submission_list,axis=1)
sub_df['mean'] = sub_df.mean(axis=1)
write_to_submission_file(sub_df[['mean']].values,out_file='submission_mean3.csv')

image.png

In [ ]:
sub_names = ['submission_cb2','submission_cb3','submission_cb5','submission_cb6','submission_cb_new_fe1','submission_cb_new_fe2','submission_mean1',
            'submission_mean2','submission_mean3']
submission_list = []
for submission_name in sub_names:
  sub = pd.read_csv(f'{submission_name}.csv',index_col=0)
  submission_list.append(sub) 
sub_df = pd.concat(submission_list,axis=1)
sub_df['mean'] = sub_df.mean(axis=1)
write_to_submission_file(sub_df[['mean']].values,out_file='submission_mean4.csv')

image.png

In [ ]:
sub_names = ['submission_cb_new_fe1','submission_mean3','submission_mean4','submission_mean5']
submission_list = []
for submission_name in sub_names:
  sub = pd.read_csv(f'{submission_name}.csv',index_col=0)
  submission_list.append(sub) 
sub_df = pd.concat(submission_list,axis=1)
sub_df['mean'] = sub_df.mean(axis=1)
write_to_submission_file(sub_df[['mean']].values,out_file='submission_mean6.csv')

image.png

In [ ]:
sub_names = ['submission_cb_new_fe1','submission_mean3','submission_mean4','submission_mean5','submission_mean6','submission_mean7']
submission_list = []
for submission_name in sub_names:
  sub = pd.read_csv(f'{submission_name}.csv',index_col=0)
  submission_list.append(sub) 
sub_df = pd.concat(submission_list,axis=1)
sub_df['mean'] = sub_df.mean(axis=1)
write_to_submission_file(sub_df[['mean']].values,out_file='submission_mean8.csv')

image.png

In [ ]:
data0 = cat_prep(normed_fe_interaction(features[good_columns],level=2, max_feats = 100,num_columns=num_columns),cat_columns)
null_map(data0)
In [ ]:
data0_nafilled = data0.fillna('NANANA')
null_map(data0_nafilled)
In [ ]:
oversampler_5 = over_sampling.RandomOverSampler(sampling_strategy=0.15,random_state=0)
data10,labels1 = oversampler_5.fit_resample(data0_nafilled,labels)
In [ ]:
data11 = pd.DataFrame(data10,columns = data0.columns)
data12 = data11.copy()
data12.head()
Out[ ]:
Var6 Var13 Var21 Var22 Var24 Var25 Var28 Var38 Var57 Var73 Var74 Var76 Var81 Var83 Var85 Var94 Var109 Var112 Var113 Var119 Var123 Var125 Var126 Var133 Var134 Var140 Var149 Var153 Var160 Var163 Var189 Var7 Var35 Var44 Var65 Var72 Var78 Var132 Var143 Var144 ... Var21,Var24_mix Var21,Var25_mix Var21,Var28_mix Var21,Var38_mix Var21,Var57_mix Var21,Var73_mix Var21,Var74_mix Var21,Var76_mix Var21,Var81_mix Var21,Var83_mix Var21,Var85_mix Var21,Var94_mix Var21,Var109_mix Var21,Var112_mix Var21,Var113_mix Var21,Var119_mix Var21,Var123_mix Var21,Var125_mix Var21,Var126_mix Var21,Var133_mix Var21,Var134_mix Var21,Var140_mix Var21,Var149_mix Var21,Var153_mix Var21,Var160_mix Var21,Var163_mix Var21,Var189_mix Var22,Var24_mix Var22,Var25_mix Var22,Var28_mix Var22,Var38_mix Var22,Var57_mix Var22,Var73_mix Var22,Var74_mix Var22,Var76_mix Var22,Var81_mix Var22,Var83_mix Var22,Var85_mix Var22,Var94_mix Var22,Var109_mix
0 3052 NANANA 480 600 20 480 200 82752 2.90793 34 NANANA 716008 14599.9 5 32 NANANA 144 144 -1.20996e+06 1660 66 NANANA 4 326915 604276 NANANA 389396 2.31389e+06 28 599532 NANANA nan 0.0 0.0 nan nan 0.0 0.0 0.0 9.0 ... 0.685776 0.796489 -0.109136 -0.364475 -0.129663 -0.270649 NANANA -0.184657 -0.363179 -0.0760304 0.498122 NANANA 0.257064 0.215867 -0.609794 0.158686 0.012613 NANANA 0.0893385 -0.352355 0.120705 NANANA 0.059532 -0.391855 -0.0483159 0.0592706 NANANA 0.696173 0.808564 -0.110791 -0.370001 -0.131629 -0.274752 NANANA -0.187457 -0.368685 -0.0771831 0.505674 NANANA 0.260961
1 1813 636 212 265 2 128 166.56 2.70612e+06 5.87033 128 0 1.66113e+06 67529.1 25 10 32289 80 72 417932 1025 66 24912 40 1.93446e+06 349568 205 735 6.50268e+06 14 364182 276 7.0 0.0 0.0 27.0 3.0 0.0 0.0 0.0 18.0 ... 0.00987057 -0.00586096 0.0230473 -0.00170797 -0.0455638 -0.0454131 0.0101156 -0.00365552 0.0130161 -0.00234647 -0.0029006 0.0148894 -0.00526304 -0.00142605 -0.0293758 -0.002169 -0.00112108 0.00131983 -0.0704942 0.00561291 0.00570884 0.0110264 0.0172522 -0.00284025 0.00988391 0.00564729 -0.00244105 0.00878584 -0.00521686 0.0205145 -0.00152027 -0.0405565 -0.0404224 0.00900392 -0.00325379 0.0115857 -0.0020886 -0.00258184 0.0132531 -0.00468466
2 1953 448 176 220 0 72 311.76 4.69878e+06 5.98163 166 245 3.02515e+06 85266 35 0 53388 40 48 -124655 590 78 7218 36 3.14841e+06 1.08621e+06 400 0 1.0569e+07 18 0 NANANA 7.0 0.0 0.0 18.0 3.0 0.0 0.0 0.0 27.0 ... 0.0469332 0.0118242 -0.0923496 -0.073052 -0.126219 -0.194444 -0.0379445 -0.0862971 0.0173566 -0.0182511 0.0420806 0.0267564 0.015154 0.0118901 -0.00401953 0.015893 -0.00868095 0.0231717 -0.168113 -0.0365064 -0.110666 0.0243275 0.0457555 -0.104261 0.0219241 0.0596268 NANANA 0.0451282 0.0113695 -0.088798 -0.0702426 -0.121365 -0.186966 -0.0364852 -0.0829783 0.0166891 -0.0175492 0.0404623 0.0257274 0.0145712
3 1533 4 332 415 0 144 220.08 864384 5.1081 30 0 2.64224e+06 74107.2 10 2 NANANA 32 32 378474 1435 24 693 NANANA 7.0667e+06 650390 5 0 9.6762e+06 108 253284 NANANA 7.0 5.0 0.0 9.0 nan 0.0 8.0 0.0 0.0 ... -0.0795709 0.0395275 -0.00756144 -0.0997307 0.138486 -0.121161 -0.0453725 0.10984 -0.0476436 -0.0201024 -0.054473 NANANA -0.0355521 -0.0378618 0.122676 0.0442952 -0.0288838 -0.0515896 NANANA 0.342929 0.0614706 -0.0578811 -0.0775743 0.14075 0.124 -0.0484006 NANANA -0.0820024 0.0407354 -0.0077925 -0.102778 0.142718 -0.124863 -0.0467589 0.113196 -0.0490995 -0.0207167 -0.0561375 NANANA -0.0366385
4 686 0 160 200 2 48 278 4.36488e+06 0.650716 32 0 1440 171073 25 12 106455 32 8 142602 490 60 468 -28 3.79446e+06 642816 225 554414 1.05352e+07 24 2.85128e+06 NANANA 7.0 0.0 0.0 9.0 3.0 0.0 0.0 0.0 9.0 ... 0.033333 0.030044 -0.0723757 -0.0785889 0.185816 0.0862291 0.0341604 0.106666 -0.0833115 -0.00792406 -0.0224972 -0.00641186 0.0267667 0.0484986 -0.0514668 0.0266341 -0.000138462 0.0391606 0.160957 -0.0814855 -0.0446298 0.036602 -0.0501923 -0.132056 0.0198955 -0.37047 NANANA 0.0323645 0.029171 -0.0702727 -0.0763054 0.180416 0.0837235 0.0331678 0.103566 -0.0808907 -0.00769381 -0.0218435 -0.00622556 0.025989

5 rows × 172 columns

In [ ]:
for col in data12.columns:
  data12.at[data12[col]=='NANANA',col] = np.nan
data12.head()
Out[ ]:
Var6 Var13 Var21 Var22 Var24 Var25 Var28 Var38 Var57 Var73 Var74 Var76 Var81 Var83 Var85 Var94 Var109 Var112 Var113 Var119 Var123 Var125 Var126 Var133 Var134 Var140 Var149 Var153 Var160 Var163 Var189 Var7 Var35 Var44 Var65 Var72 Var78 Var132 Var143 Var144 ... Var21,Var24_mix Var21,Var25_mix Var21,Var28_mix Var21,Var38_mix Var21,Var57_mix Var21,Var73_mix Var21,Var74_mix Var21,Var76_mix Var21,Var81_mix Var21,Var83_mix Var21,Var85_mix Var21,Var94_mix Var21,Var109_mix Var21,Var112_mix Var21,Var113_mix Var21,Var119_mix Var21,Var123_mix Var21,Var125_mix Var21,Var126_mix Var21,Var133_mix Var21,Var134_mix Var21,Var140_mix Var21,Var149_mix Var21,Var153_mix Var21,Var160_mix Var21,Var163_mix Var21,Var189_mix Var22,Var24_mix Var22,Var25_mix Var22,Var28_mix Var22,Var38_mix Var22,Var57_mix Var22,Var73_mix Var22,Var74_mix Var22,Var76_mix Var22,Var81_mix Var22,Var83_mix Var22,Var85_mix Var22,Var94_mix Var22,Var109_mix
0 3052 NaN 480 600 20 480 200 82752 2.90793 34 NaN 716008 14599.9 5 32 NaN 144 144 -1.20996e+06 1660 66 NaN 4 326915 604276 NaN 389396 2.31389e+06 28 599532 NaN nan 0.0 0.0 nan nan 0.0 0.0 0.0 9.0 ... 0.685776 0.796489 -0.109136 -0.364475 -0.129663 -0.270649 NaN -0.184657 -0.363179 -0.0760304 0.498122 NaN 0.257064 0.215867 -0.609794 0.158686 0.012613 NaN 0.0893385 -0.352355 0.120705 NaN 0.059532 -0.391855 -0.0483159 0.0592706 NaN 0.696173 0.808564 -0.110791 -0.370001 -0.131629 -0.274752 NaN -0.187457 -0.368685 -0.0771831 0.505674 NaN 0.260961
1 1813 636 212 265 2 128 166.56 2.70612e+06 5.87033 128 0 1.66113e+06 67529.1 25 10 32289 80 72 417932 1025 66 24912 40 1.93446e+06 349568 205 735 6.50268e+06 14 364182 276 7.0 0.0 0.0 27.0 3.0 0.0 0.0 0.0 18.0 ... 0.00987057 -0.00586096 0.0230473 -0.00170797 -0.0455638 -0.0454131 0.0101156 -0.00365552 0.0130161 -0.00234647 -0.0029006 0.0148894 -0.00526304 -0.00142605 -0.0293758 -0.002169 -0.00112108 0.00131983 -0.0704942 0.00561291 0.00570884 0.0110264 0.0172522 -0.00284025 0.00988391 0.00564729 -0.00244105 0.00878584 -0.00521686 0.0205145 -0.00152027 -0.0405565 -0.0404224 0.00900392 -0.00325379 0.0115857 -0.0020886 -0.00258184 0.0132531 -0.00468466
2 1953 448 176 220 0 72 311.76 4.69878e+06 5.98163 166 245 3.02515e+06 85266 35 0 53388 40 48 -124655 590 78 7218 36 3.14841e+06 1.08621e+06 400 0 1.0569e+07 18 0 NaN 7.0 0.0 0.0 18.0 3.0 0.0 0.0 0.0 27.0 ... 0.0469332 0.0118242 -0.0923496 -0.073052 -0.126219 -0.194444 -0.0379445 -0.0862971 0.0173566 -0.0182511 0.0420806 0.0267564 0.015154 0.0118901 -0.00401953 0.015893 -0.00868095 0.0231717 -0.168113 -0.0365064 -0.110666 0.0243275 0.0457555 -0.104261 0.0219241 0.0596268 NaN 0.0451282 0.0113695 -0.088798 -0.0702426 -0.121365 -0.186966 -0.0364852 -0.0829783 0.0166891 -0.0175492 0.0404623 0.0257274 0.0145712
3 1533 4 332 415 0 144 220.08 864384 5.1081 30 0 2.64224e+06 74107.2 10 2 NaN 32 32 378474 1435 24 693 NaN 7.0667e+06 650390 5 0 9.6762e+06 108 253284 NaN 7.0 5.0 0.0 9.0 nan 0.0 8.0 0.0 0.0 ... -0.0795709 0.0395275 -0.00756144 -0.0997307 0.138486 -0.121161 -0.0453725 0.10984 -0.0476436 -0.0201024 -0.054473 NaN -0.0355521 -0.0378618 0.122676 0.0442952 -0.0288838 -0.0515896 NaN 0.342929 0.0614706 -0.0578811 -0.0775743 0.14075 0.124 -0.0484006 NaN -0.0820024 0.0407354 -0.0077925 -0.102778 0.142718 -0.124863 -0.0467589 0.113196 -0.0490995 -0.0207167 -0.0561375 NaN -0.0366385
4 686 0 160 200 2 48 278 4.36488e+06 0.650716 32 0 1440 171073 25 12 106455 32 8 142602 490 60 468 -28 3.79446e+06 642816 225 554414 1.05352e+07 24 2.85128e+06 NaN 7.0 0.0 0.0 9.0 3.0 0.0 0.0 0.0 9.0 ... 0.033333 0.030044 -0.0723757 -0.0785889 0.185816 0.0862291 0.0341604 0.106666 -0.0833115 -0.00792406 -0.0224972 -0.00641186 0.0267667 0.0484986 -0.0514668 0.0266341 -0.000138462 0.0391606 0.160957 -0.0814855 -0.0446298 0.036602 -0.0501923 -0.132056 0.0198955 -0.37047 NaN 0.0323645 0.029171 -0.0702727 -0.0763054 0.180416 0.0837235 0.0331678 0.103566 -0.0808907 -0.00769381 -0.0218435 -0.00622556 0.025989

5 rows × 172 columns

In [ ]:
null_map(data12)
In [ ]:
data12.shape
Out[ ]:
(42577, 172)
In [ ]:
data_test = cat_prep(normed_fe_interaction(test_data[good_columns],level=2, max_feats = 100,num_columns=num_columns),cat_columns)
In [ ]:
data1 = data12.copy()
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=380,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.08,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )


estimator_cb.fit(data1, labels1, cat_features = cat_columns,verbose=10, plot=True)
              
probs = estimator_cb.predict_proba(data_test)
write_to_submission_file(probs[:,1],out_file='submission_cb_new_fe1_resampled.csv')
0:	learn: 0.8960906	total: 104ms	remaining: 39.3s
10:	learn: 0.9041462	total: 1.04s	remaining: 34.8s
20:	learn: 0.9050800	total: 1.98s	remaining: 33.8s
30:	learn: 0.9151685	total: 2.9s	remaining: 32.7s
40:	learn: 0.9262251	total: 3.87s	remaining: 32s
50:	learn: 0.9350544	total: 4.87s	remaining: 31.4s
60:	learn: 0.9382568	total: 5.88s	remaining: 30.8s
70:	learn: 0.9401846	total: 6.86s	remaining: 29.8s
80:	learn: 0.9415807	total: 7.83s	remaining: 28.9s
90:	learn: 0.9431829	total: 8.81s	remaining: 28s
100:	learn: 0.9446268	total: 9.84s	remaining: 27.2s
110:	learn: 0.9454206	total: 10.8s	remaining: 26.3s
120:	learn: 0.9466122	total: 11.9s	remaining: 25.4s
130:	learn: 0.9470798	total: 12.8s	remaining: 24.4s
140:	learn: 0.9477571	total: 13.8s	remaining: 23.4s
150:	learn: 0.9480209	total: 14.7s	remaining: 22.3s
160:	learn: 0.9485584	total: 15.7s	remaining: 21.4s
170:	learn: 0.9487638	total: 16.7s	remaining: 20.4s
180:	learn: 0.9490836	total: 17.6s	remaining: 19.3s
190:	learn: 0.9497980	total: 18.6s	remaining: 18.4s
200:	learn: 0.9501020	total: 19.5s	remaining: 17.4s
210:	learn: 0.9504761	total: 20.4s	remaining: 16.4s
220:	learn: 0.9507857	total: 21.4s	remaining: 15.4s
230:	learn: 0.9510603	total: 22.3s	remaining: 14.4s
240:	learn: 0.9513265	total: 23.3s	remaining: 13.4s
250:	learn: 0.9517757	total: 24.2s	remaining: 12.5s
260:	learn: 0.9523525	total: 25.2s	remaining: 11.5s
270:	learn: 0.9527064	total: 26.2s	remaining: 10.5s
280:	learn: 0.9529427	total: 27.1s	remaining: 9.56s
290:	learn: 0.9533327	total: 28.1s	remaining: 8.59s
300:	learn: 0.9535264	total: 29s	remaining: 7.62s
310:	learn: 0.9537878	total: 30s	remaining: 6.65s
320:	learn: 0.9540072	total: 30.9s	remaining: 5.68s
330:	learn: 0.9545115	total: 31.9s	remaining: 4.72s
340:	learn: 0.9549037	total: 32.9s	remaining: 3.76s
350:	learn: 0.9549938	total: 33.7s	remaining: 2.79s
360:	learn: 0.9550922	total: 34.6s	remaining: 1.82s
370:	learn: 0.9553126	total: 35.3s	remaining: 858ms
379:	learn: 0.9554891	total: 36.2s	remaining: 0us

image.png

In [ ]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=380,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.08,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )
data_train = cat_prep(normed_fe_interaction(feats_train[good_columns],level=2, max_feats = 200,num_columns=num_columns),cat_columns)
data_val = cat_prep(normed_fe_interaction(feats_val[good_columns],level=2, max_feats = 200,num_columns=num_columns),cat_columns)

estimator_cb.fit(data_train, labels_train, cat_features = cat_columns,verbose=10, plot=True,
                 eval_set = (data_val, labels_val))
prb = estimator_cb.predict_proba(data_val)                                                                          
                                                                
print(f'ROC AUC: {roc_auc_score(labels_val.values,prb[:,1])}')    
print(f'PRC AUC: {average_precision_score(labels_val.values,prb[:,1])}')   
                                                                        
pred = estimator_cb.predict(data_val)
print(classification_report(labels_val,pred))  
print('------------------')
print('------------------')

pr_plot(labels_val.values,prb,[0.05,0.1,0.2,0.25,0.3,0.35,0.4,0.5,0.6,0.7,0.8,0.9,0.95,0.96,0.97,0.99])
0:	learn: 0.5555654	test: 0.5696517	best: 0.5696517 (0)	total: 186ms	remaining: 1m 10s
10:	learn: 0.5727133	test: 0.5781094	best: 0.5785838 (6)	total: 1.56s	remaining: 52.3s
20:	learn: 0.5800446	test: 0.5817079	best: 0.5817079 (20)	total: 2.91s	remaining: 49.8s
30:	learn: 0.6760883	test: 0.6724941	best: 0.6724941 (30)	total: 4.32s	remaining: 48.7s
40:	learn: 0.7155201	test: 0.7094522	best: 0.7094522 (40)	total: 5.88s	remaining: 48.6s
50:	learn: 0.7256931	test: 0.7167892	best: 0.7167892 (50)	total: 7.38s	remaining: 47.6s
60:	learn: 0.7329670	test: 0.7207855	best: 0.7207855 (60)	total: 8.92s	remaining: 46.6s
70:	learn: 0.7387173	test: 0.7247927	best: 0.7247927 (70)	total: 10.4s	remaining: 45.3s
80:	learn: 0.7409686	test: 0.7265265	best: 0.7267736 (77)	total: 11.9s	remaining: 43.8s
90:	learn: 0.7451937	test: 0.7274980	best: 0.7277259 (87)	total: 13.5s	remaining: 42.8s
100:	learn: 0.7472952	test: 0.7282162	best: 0.7282162 (100)	total: 15s	remaining: 41.4s
110:	learn: 0.7494383	test: 0.7292903	best: 0.7292997 (105)	total: 16.5s	remaining: 40s
120:	learn: 0.7520274	test: 0.7304577	best: 0.7306272 (117)	total: 18s	remaining: 38.6s
130:	learn: 0.7531120	test: 0.7310719	best: 0.7310719 (130)	total: 19.4s	remaining: 36.9s
140:	learn: 0.7549482	test: 0.7321022	best: 0.7323005 (139)	total: 20.8s	remaining: 35.2s
150:	learn: 0.7567631	test: 0.7328724	best: 0.7328724 (150)	total: 22.2s	remaining: 33.6s
160:	learn: 0.7577384	test: 0.7325387	best: 0.7328724 (150)	total: 23.5s	remaining: 32s
170:	learn: 0.7583409	test: 0.7326821	best: 0.7328724 (150)	total: 24.9s	remaining: 30.5s
180:	learn: 0.7589934	test: 0.7325719	best: 0.7328724 (150)	total: 26.3s	remaining: 28.9s
190:	learn: 0.7607504	test: 0.7327792	best: 0.7330014 (185)	total: 27.7s	remaining: 27.4s
200:	learn: 0.7633972	test: 0.7334957	best: 0.7336450 (195)	total: 29.2s	remaining: 26s
210:	learn: 0.7640261	test: 0.7338208	best: 0.7338459 (209)	total: 30.6s	remaining: 24.5s
220:	learn: 0.7649964	test: 0.7338403	best: 0.7338459 (209)	total: 32s	remaining: 23s
230:	learn: 0.7660651	test: 0.7340194	best: 0.7340194 (230)	total: 33.4s	remaining: 21.6s
240:	learn: 0.7673029	test: 0.7339523	best: 0.7340974 (238)	total: 35s	remaining: 20.2s
250:	learn: 0.7686166	test: 0.7342026	best: 0.7342976 (248)	total: 36.5s	remaining: 18.7s
260:	learn: 0.7699731	test: 0.7349971	best: 0.7351055 (256)	total: 37.9s	remaining: 17.3s
270:	learn: 0.7714620	test: 0.7342958	best: 0.7351055 (256)	total: 39.5s	remaining: 15.9s
280:	learn: 0.7719929	test: 0.7343108	best: 0.7351055 (256)	total: 41s	remaining: 14.4s
290:	learn: 0.7735139	test: 0.7350894	best: 0.7352166 (288)	total: 42.4s	remaining: 13s
300:	learn: 0.7744440	test: 0.7350168	best: 0.7352166 (288)	total: 43.9s	remaining: 11.5s
310:	learn: 0.7750991	test: 0.7347031	best: 0.7352166 (288)	total: 45.3s	remaining: 10s
320:	learn: 0.7760303	test: 0.7349070	best: 0.7352166 (288)	total: 46.6s	remaining: 8.57s
330:	learn: 0.7770846	test: 0.7349144	best: 0.7352166 (288)	total: 48.1s	remaining: 7.12s
340:	learn: 0.7779750	test: 0.7345192	best: 0.7352166 (288)	total: 49.5s	remaining: 5.66s
350:	learn: 0.7791748	test: 0.7349002	best: 0.7352166 (288)	total: 50.9s	remaining: 4.2s
360:	learn: 0.7802328	test: 0.7347563	best: 0.7352166 (288)	total: 52.3s	remaining: 2.75s
370:	learn: 0.7812434	test: 0.7349121	best: 0.7352166 (288)	total: 53.7s	remaining: 1.3s
379:	learn: 0.7816489	test: 0.7348305	best: 0.7352166 (288)	total: 54.9s	remaining: 0us
bestTest = 0.735216558
bestIteration = 288
Shrink model to first 289 iterations.
ROC AUC: 0.7352168678670907
PRC AUC: 0.20550301038230848
              precision    recall  f1-score   support

          -1       0.93      1.00      0.96     11107
           1       0.53      0.01      0.02       893

    accuracy                           0.93     12000
   macro avg       0.73      0.50      0.49     12000
weighted avg       0.90      0.93      0.89     12000

------------------
------------------
In [ ]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=300,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.08,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )
data = cat_prep(normed_fe_interaction(features[good_columns],level=2, max_feats = 200,num_columns=num_columns),cat_columns)
data_test = cat_prep(normed_fe_interaction(test_data[good_columns],level=2, max_feats = 200,num_columns=num_columns),cat_columns)

estimator_cb.fit(data, labels, cat_features = cat_columns,verbose=10, plot=True)
              
probs = estimator_cb.predict_proba(data_test)
write_to_submission_file(probs[:,1],out_file='submission_cb_new_fe3.csv')
0:	learn: 0.5667429	total: 166ms	remaining: 49.5s
10:	learn: 0.5737682	total: 1.56s	remaining: 41.1s
20:	learn: 0.5810381	total: 2.86s	remaining: 37.9s
30:	learn: 0.6478194	total: 4.2s	remaining: 36.5s
40:	learn: 0.7043435	total: 5.69s	remaining: 36s
50:	learn: 0.7178824	total: 7.12s	remaining: 34.8s
60:	learn: 0.7259090	total: 8.68s	remaining: 34s
70:	learn: 0.7326476	total: 10.1s	remaining: 32.6s
80:	learn: 0.7371109	total: 11.6s	remaining: 31.3s
90:	learn: 0.7412733	total: 13.1s	remaining: 30s
100:	learn: 0.7456017	total: 14.6s	remaining: 28.7s
110:	learn: 0.7492314	total: 16.1s	remaining: 27.4s
120:	learn: 0.7502860	total: 17.6s	remaining: 26s
130:	learn: 0.7518562	total: 19s	remaining: 24.5s
140:	learn: 0.7543744	total: 20.4s	remaining: 23s
150:	learn: 0.7561948	total: 21.9s	remaining: 21.6s
160:	learn: 0.7571706	total: 23.3s	remaining: 20.1s
170:	learn: 0.7586733	total: 24.7s	remaining: 18.6s
180:	learn: 0.7594815	total: 26.1s	remaining: 17.1s
190:	learn: 0.7600362	total: 27.4s	remaining: 15.7s
200:	learn: 0.7604699	total: 28.8s	remaining: 14.2s
210:	learn: 0.7612212	total: 30.1s	remaining: 12.7s
220:	learn: 0.7618280	total: 31.5s	remaining: 11.3s
230:	learn: 0.7628669	total: 32.9s	remaining: 9.82s
240:	learn: 0.7633174	total: 34.3s	remaining: 8.39s
250:	learn: 0.7639257	total: 35.6s	remaining: 6.95s
260:	learn: 0.7644042	total: 36.9s	remaining: 5.52s
270:	learn: 0.7653677	total: 38.4s	remaining: 4.11s
280:	learn: 0.7656782	total: 39.7s	remaining: 2.68s
290:	learn: 0.7668788	total: 41.1s	remaining: 1.27s
299:	learn: 0.7671291	total: 42.3s	remaining: 0us
In [ ]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=380,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.08,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )
data_train = cat_prep(normed_fe_interaction(feats_train[good_columns],level=3, max_feats = 300,num_columns=num_columns),cat_columns)
data_val = cat_prep(normed_fe_interaction(feats_val[good_columns],level=3, max_feats = 300,num_columns=num_columns),cat_columns)

estimator_cb.fit(data_train, labels_train, cat_features = cat_columns,verbose=10, plot=True,
                 eval_set = (data_val, labels_val))
prb = estimator_cb.predict_proba(data_val)                                                                          
                                                                
print(f'ROC AUC: {roc_auc_score(labels_val.values,prb[:,1])}')    
print(f'PRC AUC: {average_precision_score(labels_val.values,prb[:,1])}')   
                                                                        
pred = estimator_cb.predict(data_val)
print(classification_report(labels_val,pred))  
print('------------------')
print('------------------')

pr_plot(labels_val.values,prb,[0.05,0.1,0.2,0.25,0.3,0.35,0.4,0.5,0.6,0.7,0.8,0.9,0.95,0.96,0.97,0.99])
0:	learn: 0.5555654	test: 0.5696517	best: 0.5696517 (0)	total: 229ms	remaining: 1m 26s
10:	learn: 0.5709300	test: 0.5785761	best: 0.5785761 (10)	total: 1.66s	remaining: 55.6s
20:	learn: 0.5779587	test: 0.5830868	best: 0.5830868 (20)	total: 3.07s	remaining: 52.5s
30:	learn: 0.6677980	test: 0.6607206	best: 0.6607206 (30)	total: 4.64s	remaining: 52.3s
40:	learn: 0.7176700	test: 0.7086438	best: 0.7086438 (40)	total: 6.3s	remaining: 52.1s
50:	learn: 0.7245792	test: 0.7141760	best: 0.7141760 (50)	total: 7.95s	remaining: 51.3s
60:	learn: 0.7313309	test: 0.7192411	best: 0.7196368 (54)	total: 9.59s	remaining: 50.2s
70:	learn: 0.7381613	test: 0.7231347	best: 0.7231347 (70)	total: 11.2s	remaining: 48.8s
80:	learn: 0.7439606	test: 0.7258459	best: 0.7258459 (80)	total: 12.8s	remaining: 47.3s
90:	learn: 0.7489358	test: 0.7280552	best: 0.7281509 (88)	total: 14.4s	remaining: 45.9s
100:	learn: 0.7516595	test: 0.7294889	best: 0.7294889 (100)	total: 16.1s	remaining: 44.4s
110:	learn: 0.7533149	test: 0.7295910	best: 0.7297969 (104)	total: 17.7s	remaining: 42.8s
120:	learn: 0.7553614	test: 0.7298191	best: 0.7298726 (116)	total: 19.3s	remaining: 41.3s
130:	learn: 0.7573925	test: 0.7305291	best: 0.7305291 (130)	total: 20.8s	remaining: 39.6s
140:	learn: 0.7594609	test: 0.7314605	best: 0.7314605 (140)	total: 22.3s	remaining: 37.8s
150:	learn: 0.7621491	test: 0.7332797	best: 0.7332859 (149)	total: 23.9s	remaining: 36.2s
160:	learn: 0.7635958	test: 0.7328248	best: 0.7332859 (149)	total: 25.3s	remaining: 34.5s
170:	learn: 0.7639247	test: 0.7328582	best: 0.7332859 (149)	total: 26.8s	remaining: 32.8s
180:	learn: 0.7652149	test: 0.7325749	best: 0.7332859 (149)	total: 28.3s	remaining: 31.2s
190:	learn: 0.7658996	test: 0.7321733	best: 0.7332859 (149)	total: 29.9s	remaining: 29.6s
200:	learn: 0.7675727	test: 0.7323743	best: 0.7332859 (149)	total: 31.4s	remaining: 28s
210:	learn: 0.7689368	test: 0.7328518	best: 0.7332859 (149)	total: 32.9s	remaining: 26.4s
220:	learn: 0.7699466	test: 0.7324936	best: 0.7332859 (149)	total: 34.5s	remaining: 24.8s
230:	learn: 0.7708967	test: 0.7326337	best: 0.7332859 (149)	total: 36s	remaining: 23.2s
240:	learn: 0.7723292	test: 0.7323572	best: 0.7332859 (149)	total: 37.5s	remaining: 21.6s
250:	learn: 0.7733848	test: 0.7321531	best: 0.7332859 (149)	total: 39s	remaining: 20s
260:	learn: 0.7744196	test: 0.7315725	best: 0.7332859 (149)	total: 40.5s	remaining: 18.5s
270:	learn: 0.7750433	test: 0.7311691	best: 0.7332859 (149)	total: 42s	remaining: 16.9s
280:	learn: 0.7759895	test: 0.7314528	best: 0.7332859 (149)	total: 43.5s	remaining: 15.3s
290:	learn: 0.7763931	test: 0.7315274	best: 0.7332859 (149)	total: 44.9s	remaining: 13.7s
300:	learn: 0.7774559	test: 0.7313842	best: 0.7332859 (149)	total: 46.3s	remaining: 12.1s
310:	learn: 0.7779290	test: 0.7312467	best: 0.7332859 (149)	total: 47.7s	remaining: 10.6s
320:	learn: 0.7789063	test: 0.7314481	best: 0.7332859 (149)	total: 49.2s	remaining: 9.05s
330:	learn: 0.7805979	test: 0.7321916	best: 0.7332859 (149)	total: 50.8s	remaining: 7.52s
340:	learn: 0.7812602	test: 0.7319290	best: 0.7332859 (149)	total: 52.2s	remaining: 5.97s
350:	learn: 0.7822564	test: 0.7321883	best: 0.7332859 (149)	total: 53.6s	remaining: 4.43s
360:	learn: 0.7830952	test: 0.7328379	best: 0.7332859 (149)	total: 55.1s	remaining: 2.9s
370:	learn: 0.7843433	test: 0.7324312	best: 0.7332859 (149)	total: 56.6s	remaining: 1.37s
379:	learn: 0.7852314	test: 0.7322131	best: 0.7332859 (149)	total: 57.9s	remaining: 0us
bestTest = 0.7332859337
bestIteration = 149
Shrink model to first 150 iterations.
ROC AUC: 0.7332860414792444
PRC AUC: 0.20287010224142016
              precision    recall  f1-score   support

          -1       0.93      1.00      0.96     11107
           1       0.50      0.01      0.01       893

    accuracy                           0.93     12000
   macro avg       0.71      0.50      0.49     12000
weighted avg       0.89      0.93      0.89     12000

------------------
------------------
In [ ]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=180,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.08,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )
data = cat_prep(normed_fe_interaction(features[good_columns],level=3, max_feats = 300,num_columns=num_columns),cat_columns)
data_test = cat_prep(normed_fe_interaction(test_data[good_columns],level=3, max_feats = 300,num_columns=num_columns),cat_columns)

estimator_cb.fit(data, labels, cat_features = cat_columns,verbose=10, plot=True)
              
probs = estimator_cb.predict_proba(data_test)
write_to_submission_file(probs[:,1],out_file='submission_cb_new_fe4.csv')
0:	learn: 0.5765340	total: 214ms	remaining: 38.3s
10:	learn: 0.5799161	total: 1.69s	remaining: 26s
20:	learn: 0.5870620	total: 3.11s	remaining: 23.6s
30:	learn: 0.6518445	total: 4.58s	remaining: 22s
40:	learn: 0.7031722	total: 6.19s	remaining: 21s
50:	learn: 0.7182985	total: 7.71s	remaining: 19.5s
60:	learn: 0.7273776	total: 9.42s	remaining: 18.4s
70:	learn: 0.7312863	total: 10.9s	remaining: 16.7s
80:	learn: 0.7365586	total: 12.5s	remaining: 15.3s
90:	learn: 0.7394513	total: 14s	remaining: 13.7s
100:	learn: 0.7420845	total: 15.7s	remaining: 12.3s
110:	learn: 0.7443395	total: 17.3s	remaining: 10.8s
120:	learn: 0.7468562	total: 18.9s	remaining: 9.22s
130:	learn: 0.7501698	total: 20.5s	remaining: 7.65s
140:	learn: 0.7526821	total: 21.9s	remaining: 6.07s
150:	learn: 0.7537583	total: 23.4s	remaining: 4.5s
160:	learn: 0.7554334	total: 24.9s	remaining: 2.94s
170:	learn: 0.7564258	total: 26.4s	remaining: 1.39s
179:	learn: 0.7574179	total: 27.7s	remaining: 0us
In [ ]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=380,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.08,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )
data_train = cat_prep(normed_fe_interaction(feats_train[good_columns],level=4, max_feats = 300,num_columns=num_columns),cat_columns)
data_val = cat_prep(normed_fe_interaction(feats_val[good_columns],level=4, max_feats = 300,num_columns=num_columns),cat_columns)

estimator_cb.fit(data_train, labels_train, cat_features = cat_columns,verbose=10, plot=True,
                 eval_set = (data_val, labels_val))
prb = estimator_cb.predict_proba(data_val)                                                                          
                                                                
print(f'ROC AUC: {roc_auc_score(labels_val.values,prb[:,1])}')    
print(f'PRC AUC: {average_precision_score(labels_val.values,prb[:,1])}')   
                                                                        
pred = estimator_cb.predict(data_val)
print(classification_report(labels_val,pred))  
print('------------------')
print('------------------')

pr_plot(labels_val.values,prb,[0.05,0.1,0.2,0.25,0.3,0.35,0.4,0.5,0.6,0.7,0.8,0.9,0.95,0.96,0.97,0.99])
0:	learn: 0.5555654	test: 0.5696517	best: 0.5696517 (0)	total: 210ms	remaining: 1m 19s
10:	learn: 0.5709300	test: 0.5785761	best: 0.5785761 (10)	total: 1.67s	remaining: 56.1s
20:	learn: 0.5779587	test: 0.5830868	best: 0.5830868 (20)	total: 3.05s	remaining: 52.1s
30:	learn: 0.6677980	test: 0.6607206	best: 0.6607206 (30)	total: 4.61s	remaining: 51.9s
40:	learn: 0.7124102	test: 0.7036034	best: 0.7036034 (40)	total: 6.25s	remaining: 51.6s
50:	learn: 0.7223031	test: 0.7120764	best: 0.7120764 (50)	total: 7.87s	remaining: 50.8s
60:	learn: 0.7315007	test: 0.7200186	best: 0.7200186 (60)	total: 9.49s	remaining: 49.6s
70:	learn: 0.7366947	test: 0.7230960	best: 0.7230960 (70)	total: 11.1s	remaining: 48.4s
80:	learn: 0.7452885	test: 0.7279401	best: 0.7279401 (80)	total: 12.8s	remaining: 47.1s
90:	learn: 0.7498814	test: 0.7305478	best: 0.7305478 (90)	total: 14.4s	remaining: 45.8s
100:	learn: 0.7536936	test: 0.7324928	best: 0.7324928 (100)	total: 16.1s	remaining: 44.4s
110:	learn: 0.7561547	test: 0.7333241	best: 0.7333794 (108)	total: 17.7s	remaining: 42.8s
120:	learn: 0.7587497	test: 0.7336738	best: 0.7339002 (114)	total: 19.3s	remaining: 41.3s
130:	learn: 0.7603919	test: 0.7343600	best: 0.7343600 (130)	total: 20.8s	remaining: 39.6s
140:	learn: 0.7621326	test: 0.7347343	best: 0.7347343 (140)	total: 22.3s	remaining: 37.8s
150:	learn: 0.7630398	test: 0.7349322	best: 0.7349322 (150)	total: 23.8s	remaining: 36.2s
160:	learn: 0.7641883	test: 0.7347896	best: 0.7349322 (150)	total: 25.4s	remaining: 34.5s
170:	learn: 0.7651099	test: 0.7346004	best: 0.7349322 (150)	total: 26.9s	remaining: 32.9s
180:	learn: 0.7664167	test: 0.7347772	best: 0.7349322 (150)	total: 28.4s	remaining: 31.3s
190:	learn: 0.7679494	test: 0.7345427	best: 0.7349322 (150)	total: 29.9s	remaining: 29.6s
200:	learn: 0.7688094	test: 0.7346942	best: 0.7349322 (150)	total: 31.4s	remaining: 27.9s
210:	learn: 0.7698445	test: 0.7347760	best: 0.7349984 (209)	total: 32.9s	remaining: 26.3s
220:	learn: 0.7705883	test: 0.7345798	best: 0.7349984 (209)	total: 34.4s	remaining: 24.7s
230:	learn: 0.7716129	test: 0.7341936	best: 0.7349984 (209)	total: 35.8s	remaining: 23.1s
240:	learn: 0.7721492	test: 0.7343276	best: 0.7349984 (209)	total: 37.3s	remaining: 21.5s
250:	learn: 0.7729778	test: 0.7342165	best: 0.7349984 (209)	total: 38.8s	remaining: 20s
260:	learn: 0.7735138	test: 0.7338345	best: 0.7349984 (209)	total: 40.4s	remaining: 18.4s
270:	learn: 0.7742584	test: 0.7335395	best: 0.7349984 (209)	total: 41.9s	remaining: 16.9s
280:	learn: 0.7753893	test: 0.7339402	best: 0.7349984 (209)	total: 43.4s	remaining: 15.3s
290:	learn: 0.7766983	test: 0.7341620	best: 0.7349984 (209)	total: 44.9s	remaining: 13.7s
300:	learn: 0.7770339	test: 0.7338479	best: 0.7349984 (209)	total: 46.4s	remaining: 12.2s
310:	learn: 0.7775496	test: 0.7335458	best: 0.7349984 (209)	total: 47.9s	remaining: 10.6s
320:	learn: 0.7786137	test: 0.7332474	best: 0.7349984 (209)	total: 49.3s	remaining: 9.06s
330:	learn: 0.7788270	test: 0.7332349	best: 0.7349984 (209)	total: 50.7s	remaining: 7.51s
340:	learn: 0.7792167	test: 0.7328802	best: 0.7349984 (209)	total: 52.2s	remaining: 5.97s
350:	learn: 0.7796662	test: 0.7329763	best: 0.7349984 (209)	total: 53.6s	remaining: 4.42s
360:	learn: 0.7802175	test: 0.7329451	best: 0.7349984 (209)	total: 55s	remaining: 2.9s
370:	learn: 0.7809888	test: 0.7325402	best: 0.7349984 (209)	total: 56.4s	remaining: 1.37s
379:	learn: 0.7815396	test: 0.7325450	best: 0.7349984 (209)	total: 57.7s	remaining: 0us
bestTest = 0.734998405
bestIteration = 209
Shrink model to first 210 iterations.
ROC AUC: 0.7349984891946415
PRC AUC: 0.20296086399117608
              precision    recall  f1-score   support

          -1       0.93      1.00      0.96     11107
           1       0.57      0.01      0.02       893

    accuracy                           0.93     12000
   macro avg       0.75      0.50      0.49     12000
weighted avg       0.90      0.93      0.89     12000

------------------
------------------
In [ ]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=250,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.08,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )
data = cat_prep(normed_fe_interaction(features[good_columns],level=4, max_feats = 300,num_columns=num_columns),cat_columns)
data_test = cat_prep(normed_fe_interaction(test_data[good_columns],level=4, max_feats = 300,num_columns=num_columns),cat_columns)

estimator_cb.fit(data, labels, cat_features = cat_columns,verbose=10, plot=True)
              
probs = estimator_cb.predict_proba(data_test)
write_to_submission_file(probs[:,1],out_file='submission_cb_new_fe5.csv')
0:	learn: 0.5765340	total: 200ms	remaining: 49.7s
10:	learn: 0.5799161	total: 1.67s	remaining: 36.2s
20:	learn: 0.5870620	total: 3.06s	remaining: 33.3s
30:	learn: 0.6518445	total: 4.54s	remaining: 32s
40:	learn: 0.7019444	total: 6.16s	remaining: 31.4s
50:	learn: 0.7162397	total: 7.72s	remaining: 30.1s
60:	learn: 0.7286017	total: 9.42s	remaining: 29.2s
70:	learn: 0.7319286	total: 11s	remaining: 27.7s
80:	learn: 0.7376823	total: 12.6s	remaining: 26.3s
90:	learn: 0.7398947	total: 14.3s	remaining: 24.9s
100:	learn: 0.7423709	total: 15.9s	remaining: 23.4s
110:	learn: 0.7449867	total: 17.5s	remaining: 21.9s
120:	learn: 0.7463365	total: 19.1s	remaining: 20.4s
130:	learn: 0.7498860	total: 20.7s	remaining: 18.8s
140:	learn: 0.7529285	total: 22.3s	remaining: 17.2s
150:	learn: 0.7542704	total: 23.8s	remaining: 15.6s
160:	learn: 0.7565077	total: 25.4s	remaining: 14s
170:	learn: 0.7571968	total: 26.9s	remaining: 12.4s
180:	learn: 0.7578454	total: 28.4s	remaining: 10.8s
190:	learn: 0.7588466	total: 29.8s	remaining: 9.2s
200:	learn: 0.7594658	total: 31.3s	remaining: 7.62s
210:	learn: 0.7602586	total: 32.8s	remaining: 6.05s
220:	learn: 0.7611132	total: 34.2s	remaining: 4.49s
230:	learn: 0.7621207	total: 35.7s	remaining: 2.94s
240:	learn: 0.7627892	total: 37.1s	remaining: 1.39s
249:	learn: 0.7636247	total: 38.5s	remaining: 0us
In [ ]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=380,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.08,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )
data_train = cat_prep(normed_fe_interaction(feats_train[good_columns],level=5, max_feats = 300,num_columns=num_columns),cat_columns)
data_val = cat_prep(normed_fe_interaction(feats_val[good_columns],level=5, max_feats = 300,num_columns=num_columns),cat_columns)

estimator_cb.fit(data_train, labels_train, cat_features = cat_columns,verbose=10, plot=True,
                 eval_set = (data_val, labels_val))
prb = estimator_cb.predict_proba(data_val)                                                                          
                                                                
print(f'ROC AUC: {roc_auc_score(labels_val.values,prb[:,1])}')    
print(f'PRC AUC: {average_precision_score(labels_val.values,prb[:,1])}')   
                                                                        
pred = estimator_cb.predict(data_val)
print(classification_report(labels_val,pred))  
print('------------------')
print('------------------')

pr_plot(labels_val.values,prb,[0.05,0.1,0.2,0.25,0.3,0.35,0.4,0.5,0.6,0.7,0.8,0.9,0.95,0.96,0.97,0.99])
0:	learn: 0.5555654	test: 0.5696517	best: 0.5696517 (0)	total: 211ms	remaining: 1m 19s
10:	learn: 0.5709300	test: 0.5785761	best: 0.5785761 (10)	total: 1.69s	remaining: 56.6s
20:	learn: 0.5779587	test: 0.5830868	best: 0.5830868 (20)	total: 3.08s	remaining: 52.8s
30:	learn: 0.6677980	test: 0.6607206	best: 0.6607206 (30)	total: 4.64s	remaining: 52.2s
40:	learn: 0.7124102	test: 0.7036034	best: 0.7036034 (40)	total: 6.28s	remaining: 52s
50:	learn: 0.7207803	test: 0.7090081	best: 0.7090081 (50)	total: 7.92s	remaining: 51.1s
60:	learn: 0.7300992	test: 0.7172233	best: 0.7174205 (54)	total: 9.51s	remaining: 49.7s
70:	learn: 0.7371882	test: 0.7223321	best: 0.7223321 (70)	total: 11.2s	remaining: 48.8s
80:	learn: 0.7405784	test: 0.7244780	best: 0.7244780 (80)	total: 12.8s	remaining: 47.2s
90:	learn: 0.7462393	test: 0.7270537	best: 0.7274523 (89)	total: 14.4s	remaining: 45.7s
100:	learn: 0.7488309	test: 0.7290478	best: 0.7290478 (100)	total: 16s	remaining: 44.1s
110:	learn: 0.7509729	test: 0.7295904	best: 0.7295904 (110)	total: 17.6s	remaining: 42.6s
120:	learn: 0.7533730	test: 0.7311198	best: 0.7311333 (119)	total: 19.2s	remaining: 41s
130:	learn: 0.7553961	test: 0.7319716	best: 0.7319716 (130)	total: 20.7s	remaining: 39.4s
140:	learn: 0.7579612	test: 0.7336211	best: 0.7337087 (139)	total: 22.2s	remaining: 37.6s
150:	learn: 0.7594694	test: 0.7340060	best: 0.7341454 (143)	total: 23.7s	remaining: 35.9s
160:	learn: 0.7607458	test: 0.7341571	best: 0.7341571 (160)	total: 25.2s	remaining: 34.2s
170:	learn: 0.7622864	test: 0.7335684	best: 0.7341571 (160)	total: 26.6s	remaining: 32.6s
180:	learn: 0.7630388	test: 0.7333955	best: 0.7341571 (160)	total: 28.1s	remaining: 30.9s
190:	learn: 0.7644662	test: 0.7330844	best: 0.7341571 (160)	total: 29.6s	remaining: 29.3s
200:	learn: 0.7662007	test: 0.7325102	best: 0.7341571 (160)	total: 31.2s	remaining: 27.8s
210:	learn: 0.7671281	test: 0.7323653	best: 0.7341571 (160)	total: 32.7s	remaining: 26.2s
220:	learn: 0.7680160	test: 0.7318060	best: 0.7341571 (160)	total: 34.2s	remaining: 24.6s
230:	learn: 0.7689944	test: 0.7318511	best: 0.7341571 (160)	total: 35.8s	remaining: 23.1s
240:	learn: 0.7700739	test: 0.7319920	best: 0.7341571 (160)	total: 37.3s	remaining: 21.5s
250:	learn: 0.7711785	test: 0.7316184	best: 0.7341571 (160)	total: 38.8s	remaining: 19.9s
260:	learn: 0.7722654	test: 0.7312644	best: 0.7341571 (160)	total: 40.4s	remaining: 18.4s
270:	learn: 0.7736086	test: 0.7318873	best: 0.7341571 (160)	total: 41.8s	remaining: 16.8s
280:	learn: 0.7749366	test: 0.7326388	best: 0.7341571 (160)	total: 43.4s	remaining: 15.3s
290:	learn: 0.7753859	test: 0.7321940	best: 0.7341571 (160)	total: 44.8s	remaining: 13.7s
300:	learn: 0.7762171	test: 0.7320569	best: 0.7341571 (160)	total: 46.2s	remaining: 12.1s
310:	learn: 0.7767181	test: 0.7317837	best: 0.7341571 (160)	total: 47.7s	remaining: 10.6s
320:	learn: 0.7778807	test: 0.7313338	best: 0.7341571 (160)	total: 49.2s	remaining: 9.04s
330:	learn: 0.7784521	test: 0.7312597	best: 0.7341571 (160)	total: 50.7s	remaining: 7.5s
340:	learn: 0.7788988	test: 0.7309624	best: 0.7341571 (160)	total: 52s	remaining: 5.95s
350:	learn: 0.7796791	test: 0.7311764	best: 0.7341571 (160)	total: 53.4s	remaining: 4.41s
360:	learn: 0.7803776	test: 0.7310998	best: 0.7341571 (160)	total: 54.8s	remaining: 2.88s
370:	learn: 0.7809112	test: 0.7307486	best: 0.7341571 (160)	total: 56.2s	remaining: 1.36s
379:	learn: 0.7824081	test: 0.7301829	best: 0.7341571 (160)	total: 57.6s	remaining: 0us
bestTest = 0.734157145
bestIteration = 160
Shrink model to first 161 iterations.
ROC AUC: 0.7341570356395808
PRC AUC: 0.20524651984083675
              precision    recall  f1-score   support

          -1       0.93      1.00      0.96     11107
           1       0.67      0.01      0.02       893

    accuracy                           0.93     12000
   macro avg       0.80      0.50      0.49     12000
weighted avg       0.91      0.93      0.89     12000

------------------
------------------
In [ ]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=250,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.08,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )
data = cat_prep(normed_fe_interaction(features[good_columns],level=5, max_feats = 400,num_columns=num_columns),cat_columns)
data_test = cat_prep(normed_fe_interaction(test_data[good_columns],level=5, max_feats = 400,num_columns=num_columns),cat_columns)

estimator_cb.fit(data, labels, cat_features = cat_columns,verbose=10, plot=True)
              
probs = estimator_cb.predict_proba(data_test)
write_to_submission_file(probs[:,1],out_file='submission_cb_new_fe6.csv')
0:	learn: 0.5765340	total: 212ms	remaining: 52.9s
10:	learn: 0.5768688	total: 1.77s	remaining: 38.6s
20:	learn: 0.5865380	total: 3.25s	remaining: 35.5s
30:	learn: 0.6364463	total: 4.77s	remaining: 33.7s
40:	learn: 0.6955057	total: 6.38s	remaining: 32.5s
50:	learn: 0.7145198	total: 8.09s	remaining: 31.6s
60:	learn: 0.7219178	total: 9.77s	remaining: 30.3s
70:	learn: 0.7286468	total: 11.3s	remaining: 28.4s
80:	learn: 0.7331868	total: 12.9s	remaining: 27s
90:	learn: 0.7365956	total: 14.6s	remaining: 25.5s
100:	learn: 0.7423750	total: 16.3s	remaining: 24.1s
110:	learn: 0.7454753	total: 18s	remaining: 22.6s
120:	learn: 0.7475723	total: 19.6s	remaining: 20.9s
130:	learn: 0.7508287	total: 21.4s	remaining: 19.4s
140:	learn: 0.7526286	total: 22.9s	remaining: 17.7s
150:	learn: 0.7540284	total: 24.5s	remaining: 16.1s
160:	learn: 0.7560124	total: 26.2s	remaining: 14.5s
170:	learn: 0.7575312	total: 27.7s	remaining: 12.8s
180:	learn: 0.7582612	total: 29.3s	remaining: 11.2s
190:	learn: 0.7595958	total: 30.9s	remaining: 9.56s
200:	learn: 0.7606539	total: 32.6s	remaining: 7.95s
210:	learn: 0.7618932	total: 34.3s	remaining: 6.33s
220:	learn: 0.7627231	total: 35.8s	remaining: 4.7s
230:	learn: 0.7637294	total: 37.4s	remaining: 3.08s
240:	learn: 0.7649786	total: 39s	remaining: 1.46s
249:	learn: 0.7657140	total: 40.5s	remaining: 0us

image.png

In [ ]:
sub_names = ['submission_cb_new_fe1','submission_mean3','submission_mean4','submission_mean5','submission_cb_new_fe6',
             'submission_mean6','submission_mean7','submission_mean8']
submission_list = []
for submission_name in sub_names:
  sub = pd.read_csv(f'{submission_name}.csv',index_col=0)
  submission_list.append(sub) 
sub_df = pd.concat(submission_list,axis=1)
sub_df['mean'] = sub_df.mean(axis=1)
write_to_submission_file(sub_df[['mean']].values,out_file='submission_mean9.csv')

image.png

In [ ]:
 
In [21]:
[num_inds, cat_indices1,cat_indices2]= data_preprocessor(feats_train,params=[0.7, 0.5, 30])# params = [prop_nan,max_prop_unique,N_lim]
cat_columns0 = list(np.array(features.columns)[cat_indices1]) + list(np.array(features.columns)[cat_indices2])
num_columns0 = list(np.array(features.columns)[num_inds])
good_columns0 = num_columns0 + cat_columns0

print(num_columns0)

data_train0 = cat_prep(normed_fe_interaction(feats_train[good_columns0],level=2, max_feats = 60,num_columns=num_columns0),cat_columns0)

data_val0 = cat_prep(normed_fe_interaction(feats_val[good_columns0],level=2, max_feats = 60,num_columns=num_columns0),cat_columns0)


[num_inds, cat_indices1,cat_indices2]= data_preprocessor(data_train0,params=[0.7, 0.5, 30])# params = [prop_nan,max_prop_unique,N_lim]
cat_columns1 = list(np.array(data_train0.columns)[cat_indices1]) + list(np.array(data_train0.columns)[cat_indices2])
num_columns1 = list(np.array(data_train0.columns)[num_inds])
good_columns1 = num_columns1 + cat_columns1

print(num_columns1)

data_train1 = cat_prep(normed_fe_interaction(data_train0[good_columns1],level=5, max_feats = 150,num_columns=num_columns1),cat_columns1)

data_val1 = cat_prep(normed_fe_interaction(data_val0[good_columns1],level=5, max_feats = 150,num_columns=num_columns1),cat_columns1)
['Var6', 'Var13', 'Var21', 'Var22', 'Var24', 'Var25', 'Var28', 'Var38', 'Var57', 'Var73', 'Var74', 'Var76', 'Var81', 'Var83', 'Var85', 'Var94', 'Var109', 'Var112', 'Var113', 'Var119', 'Var123', 'Var125', 'Var126', 'Var133', 'Var134', 'Var140', 'Var149', 'Var153', 'Var160', 'Var163', 'Var189']
['Var6', 'Var13', 'Var21', 'Var22', 'Var24', 'Var25', 'Var28', 'Var38', 'Var57', 'Var73', 'Var74', 'Var76', 'Var81', 'Var83', 'Var85', 'Var94', 'Var109', 'Var112', 'Var113', 'Var119', 'Var123', 'Var125', 'Var126', 'Var133', 'Var134', 'Var140', 'Var149', 'Var153', 'Var160', 'Var163', 'Var189', 'Var6,Var13_mix', 'Var6,Var21_mix', 'Var6,Var22_mix', 'Var6,Var24_mix', 'Var6,Var25_mix', 'Var6,Var28_mix', 'Var6,Var38_mix', 'Var6,Var57_mix', 'Var6,Var73_mix', 'Var6,Var74_mix', 'Var6,Var76_mix', 'Var6,Var81_mix', 'Var6,Var83_mix', 'Var6,Var85_mix', 'Var6,Var94_mix', 'Var6,Var109_mix', 'Var6,Var112_mix', 'Var6,Var113_mix', 'Var6,Var119_mix', 'Var6,Var123_mix', 'Var6,Var125_mix', 'Var6,Var126_mix', 'Var6,Var133_mix', 'Var6,Var134_mix', 'Var6,Var140_mix', 'Var6,Var149_mix', 'Var6,Var153_mix', 'Var6,Var160_mix', 'Var6,Var163_mix', 'Var6,Var189_mix', 'Var13,Var21_mix', 'Var13,Var22_mix', 'Var13,Var24_mix', 'Var13,Var25_mix', 'Var13,Var28_mix', 'Var13,Var38_mix', 'Var13,Var57_mix', 'Var13,Var73_mix', 'Var13,Var74_mix', 'Var13,Var76_mix', 'Var13,Var81_mix', 'Var13,Var83_mix', 'Var13,Var85_mix', 'Var13,Var94_mix', 'Var13,Var109_mix', 'Var13,Var112_mix', 'Var13,Var113_mix', 'Var13,Var119_mix', 'Var13,Var123_mix', 'Var13,Var125_mix', 'Var13,Var126_mix', 'Var13,Var133_mix', 'Var13,Var134_mix', 'Var13,Var140_mix', 'Var13,Var149_mix', 'Var13,Var153_mix', 'Var13,Var160_mix', 'Var13,Var163_mix', 'Var13,Var189_mix', 'Var21,Var22_mix']
In [22]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=380,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.08,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )

estimator_cb.fit(data_train1, labels_train, cat_features = cat_columns1,verbose=10, plot=True,
                 eval_set = (data_val1, labels_val))
prb = estimator_cb.predict_proba(data_val1)                                                                          
                                                                
print(f'ROC AUC: {roc_auc_score(labels_val.values,prb[:,1])}')    
print(f'PRC AUC: {average_precision_score(labels_val.values,prb[:,1])}')   
                                                                        
pred = estimator_cb.predict(data_val1)
print(classification_report(labels_val,pred))  
print('------------------')
print('------------------')

pr_plot(labels_val.values,prb,[0.05,0.1,0.2,0.25,0.3,0.35,0.4,0.5,0.6,0.7,0.8,0.9,0.95,0.96,0.97,0.99])
0:	learn: 0.5506279	test: 0.5668173	best: 0.5668173 (0)	total: 191ms	remaining: 1m 12s
10:	learn: 0.5695241	test: 0.5773915	best: 0.5774430 (9)	total: 1.62s	remaining: 54.3s
20:	learn: 0.5836996	test: 0.5883385	best: 0.5883385 (20)	total: 3.06s	remaining: 52.3s
30:	learn: 0.6523662	test: 0.6526531	best: 0.6526531 (30)	total: 4.56s	remaining: 51.4s
40:	learn: 0.7082621	test: 0.7031222	best: 0.7031222 (40)	total: 6.11s	remaining: 50.5s
50:	learn: 0.7261671	test: 0.7169313	best: 0.7169313 (50)	total: 7.67s	remaining: 49.5s
60:	learn: 0.7348006	test: 0.7235273	best: 0.7235273 (60)	total: 9.19s	remaining: 48s
70:	learn: 0.7382246	test: 0.7241529	best: 0.7243347 (66)	total: 10.7s	remaining: 46.6s
80:	learn: 0.7432836	test: 0.7278105	best: 0.7278105 (80)	total: 12.2s	remaining: 45.1s
90:	learn: 0.7464185	test: 0.7294106	best: 0.7296453 (85)	total: 13.7s	remaining: 43.5s
100:	learn: 0.7478922	test: 0.7304128	best: 0.7304863 (98)	total: 15.2s	remaining: 42.1s
110:	learn: 0.7510823	test: 0.7305096	best: 0.7305096 (110)	total: 16.8s	remaining: 40.6s
120:	learn: 0.7527803	test: 0.7302905	best: 0.7307623 (114)	total: 18.3s	remaining: 39.2s
130:	learn: 0.7553444	test: 0.7308158	best: 0.7310733 (127)	total: 19.8s	remaining: 37.7s
140:	learn: 0.7566465	test: 0.7316563	best: 0.7317172 (139)	total: 21.2s	remaining: 35.9s
150:	learn: 0.7584276	test: 0.7324299	best: 0.7324299 (150)	total: 22.6s	remaining: 34.3s
160:	learn: 0.7592962	test: 0.7314688	best: 0.7324422 (151)	total: 24s	remaining: 32.7s
170:	learn: 0.7601477	test: 0.7319155	best: 0.7324422 (151)	total: 25.4s	remaining: 31s
180:	learn: 0.7612467	test: 0.7319940	best: 0.7324422 (151)	total: 26.8s	remaining: 29.5s
190:	learn: 0.7632065	test: 0.7328493	best: 0.7328743 (188)	total: 28.3s	remaining: 28s
200:	learn: 0.7639552	test: 0.7324753	best: 0.7328743 (188)	total: 29.7s	remaining: 26.4s
210:	learn: 0.7643762	test: 0.7323072	best: 0.7328743 (188)	total: 31.1s	remaining: 24.9s
220:	learn: 0.7646495	test: 0.7323158	best: 0.7328743 (188)	total: 32.5s	remaining: 23.4s
230:	learn: 0.7657368	test: 0.7325968	best: 0.7328743 (188)	total: 33.9s	remaining: 21.9s
240:	learn: 0.7666927	test: 0.7328572	best: 0.7329827 (233)	total: 35.4s	remaining: 20.4s
250:	learn: 0.7685509	test: 0.7336479	best: 0.7336479 (250)	total: 36.8s	remaining: 18.9s
260:	learn: 0.7699310	test: 0.7340272	best: 0.7340552 (254)	total: 38.3s	remaining: 17.4s
270:	learn: 0.7704172	test: 0.7337635	best: 0.7340648 (263)	total: 39.6s	remaining: 15.9s
280:	learn: 0.7713839	test: 0.7342292	best: 0.7343264 (273)	total: 41s	remaining: 14.4s
290:	learn: 0.7737579	test: 0.7357105	best: 0.7357105 (290)	total: 42.4s	remaining: 13s
300:	learn: 0.7742167	test: 0.7355983	best: 0.7357115 (291)	total: 43.8s	remaining: 11.5s
310:	learn: 0.7751320	test: 0.7352556	best: 0.7357115 (291)	total: 45.2s	remaining: 10s
320:	learn: 0.7759741	test: 0.7350259	best: 0.7357115 (291)	total: 46.5s	remaining: 8.55s
330:	learn: 0.7775387	test: 0.7360985	best: 0.7361001 (329)	total: 48s	remaining: 7.11s
340:	learn: 0.7779773	test: 0.7358010	best: 0.7361641 (331)	total: 49.4s	remaining: 5.64s
350:	learn: 0.7791976	test: 0.7360960	best: 0.7361832 (345)	total: 50.7s	remaining: 4.19s
360:	learn: 0.7798114	test: 0.7358528	best: 0.7361832 (345)	total: 52.2s	remaining: 2.75s
370:	learn: 0.7809396	test: 0.7356589	best: 0.7361832 (345)	total: 53.6s	remaining: 1.3s
379:	learn: 0.7819140	test: 0.7355784	best: 0.7361832 (345)	total: 54.8s	remaining: 0us
bestTest = 0.7361832261
bestIteration = 345
Shrink model to first 346 iterations.
ROC AUC: 0.7361831380410303
PRC AUC: 0.2058638779325551
              precision    recall  f1-score   support

          -1       0.93      1.00      0.96     11107
           1       0.57      0.01      0.02       893

    accuracy                           0.93     12000
   macro avg       0.75      0.50      0.49     12000
weighted avg       0.90      0.93      0.89     12000

------------------
------------------
In [24]:
data0 = cat_prep(normed_fe_interaction(features[good_columns0],level=2, max_feats = 70,num_columns=num_columns0),cat_columns0)

data_test0 = cat_prep(normed_fe_interaction(test_data[good_columns0],level=2, max_feats = 70,num_columns=num_columns0),cat_columns0)

data1 = cat_prep(normed_fe_interaction(data0[good_columns1],level=5, max_feats = 150,num_columns=num_columns1),cat_columns1)
data_test1 = cat_prep(normed_fe_interaction(data_test0[good_columns1],level=5, max_feats = 150,num_columns=num_columns1),cat_columns1)


data1.columns
Out[24]:
Index(['Var6', 'Var13', 'Var21', 'Var22', 'Var24', 'Var25', 'Var28', 'Var38',
       'Var57', 'Var73',
       ...
       'Var6,Var13,Var21,Var24,Var6,Var160_mix_mix',
       'Var6,Var13,Var21,Var24,Var6,Var163_mix_mix',
       'Var6,Var13,Var21,Var24,Var6,Var189_mix_mix',
       'Var6,Var13,Var21,Var24,Var13,Var21_mix_mix',
       'Var6,Var13,Var21,Var24,Var13,Var22_mix_mix',
       'Var6,Var13,Var21,Var24,Var13,Var24_mix_mix',
       'Var6,Var13,Var21,Var24,Var13,Var25_mix_mix',
       'Var6,Var13,Var21,Var24,Var13,Var28_mix_mix',
       'Var6,Var13,Var21,Var24,Var13,Var38_mix_mix',
       'Var6,Var13,Var21,Var24,Var13,Var57_mix_mix'],
      dtype='object', length=282)
In [26]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=380,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.08,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )


estimator_cb.fit(data1, labels, cat_features = cat_columns1,verbose=10, plot=True)
              
probs = estimator_cb.predict_proba(data_test1)
write_to_submission_file(probs[:,1],out_file='submission_cb_new_fe7.csv')
0:	learn: 0.5451564	total: 184ms	remaining: 1m 9s
10:	learn: 0.5725576	total: 1.58s	remaining: 53.1s
20:	learn: 0.5805818	total: 2.85s	remaining: 48.6s
30:	learn: 0.6456250	total: 4.25s	remaining: 47.8s
40:	learn: 0.6933237	total: 5.72s	remaining: 47.3s
50:	learn: 0.7176251	total: 7.26s	remaining: 46.8s
60:	learn: 0.7294395	total: 8.76s	remaining: 45.8s
70:	learn: 0.7340938	total: 10.2s	remaining: 44.5s
80:	learn: 0.7382719	total: 11.7s	remaining: 43s
90:	learn: 0.7408038	total: 13.1s	remaining: 41.6s
100:	learn: 0.7453128	total: 14.6s	remaining: 40.3s
110:	learn: 0.7475374	total: 16.1s	remaining: 39s
120:	learn: 0.7486774	total: 17.6s	remaining: 37.7s
130:	learn: 0.7511468	total: 19s	remaining: 36.1s
140:	learn: 0.7526319	total: 20.4s	remaining: 34.6s
150:	learn: 0.7539047	total: 21.8s	remaining: 33.1s
160:	learn: 0.7547536	total: 23.3s	remaining: 31.7s
170:	learn: 0.7556646	total: 24.7s	remaining: 30.2s
180:	learn: 0.7577817	total: 26.1s	remaining: 28.7s
190:	learn: 0.7582746	total: 27.5s	remaining: 27.2s
200:	learn: 0.7588360	total: 28.9s	remaining: 25.7s
210:	learn: 0.7601500	total: 30.3s	remaining: 24.3s
220:	learn: 0.7614768	total: 31.8s	remaining: 22.9s
230:	learn: 0.7622847	total: 33.2s	remaining: 21.4s
240:	learn: 0.7635843	total: 34.6s	remaining: 19.9s
250:	learn: 0.7645206	total: 36s	remaining: 18.5s
260:	learn: 0.7651803	total: 37.4s	remaining: 17.1s
270:	learn: 0.7659303	total: 38.8s	remaining: 15.6s
280:	learn: 0.7671121	total: 40.2s	remaining: 14.2s
290:	learn: 0.7680102	total: 41.6s	remaining: 12.7s
300:	learn: 0.7684099	total: 43s	remaining: 11.3s
310:	learn: 0.7692982	total: 44.4s	remaining: 9.86s
320:	learn: 0.7700073	total: 45.8s	remaining: 8.41s
330:	learn: 0.7708914	total: 47.2s	remaining: 6.98s
340:	learn: 0.7718505	total: 48.7s	remaining: 5.56s
350:	learn: 0.7721811	total: 49.9s	remaining: 4.12s
360:	learn: 0.7727009	total: 51.2s	remaining: 2.69s
370:	learn: 0.7730901	total: 52.4s	remaining: 1.27s
379:	learn: 0.7737397	total: 53.7s	remaining: 0us

image.png

In [19]:
def normed_fe_interaction_2_3_4_5(dataset, max_feats = 10,num_columns=[]):
  #Создает признаки на основе комбинаций существующих
  dataset = dataset.copy()
  data = dataset[num_columns]
  
  fe_list = []
 
  for comb in list(itertools.combinations(data.columns,2))[:max_feats]:
  
    
      a = (data[comb[0]] - data[comb[0]].mean())/data[comb[0]].std()
      b = (data[comb[1]] - data[comb[1]].mean())/data[comb[1]].std()      
      feature = a*b
      feature.name = f'{comb[0]},{comb[1]}_mix'

      fe_list.append(pd.DataFrame(feature))
      
  for comb in list(itertools.combinations(data.columns,3))[:max_feats]:
      a = (data[comb[0]] - data[comb[0]].mean())/data[comb[0]].std()
      b = (data[comb[1]] - data[comb[1]].mean())/data[comb[1]].std()
      c = (data[comb[2]] - data[comb[2]].mean())/data[comb[2]].std()
      feature = a*b + a*c + b*c
      feature.name = f'{comb[0]},{comb[1]},{comb[2]}_mix'
      fe_list.append(pd.DataFrame(feature))

  for comb in list(itertools.combinations(data.columns,4))[:max_feats]:  
      a = (data[comb[0]] - data[comb[0]].mean())/data[comb[0]].std()
      b = (data[comb[1]] - data[comb[1]].mean())/data[comb[1]].std()
      c = (data[comb[2]] - data[comb[2]].mean())/data[comb[2]].std()
      d = (data[comb[3]] - data[comb[3]].mean())/data[comb[3]].std()
      feature = a*b + a*c + a*d + b*d + b*c + c*d       
      feature.name = f'{comb[0]},{comb[1]},{comb[2]},{comb[3]}_mix'
      fe_list.append(pd.DataFrame(feature))

  for comb in list(itertools.combinations(data.columns,5))[:max_feats]:  
      a = (data[comb[0]] - data[comb[0]].mean())/data[comb[0]].std()
      b = (data[comb[1]] - data[comb[1]].mean())/data[comb[1]].std()
      c = (data[comb[2]] - data[comb[2]].mean())/data[comb[2]].std()
      d = (data[comb[3]] - data[comb[3]].mean())/data[comb[3]].std()
      e = (data[comb[4]] - data[comb[4]].mean())/data[comb[4]].std()
      feature = a*b + a*c + a*d +a*e + b*c + b*d + b*e + c*d + c*e + d*e
      feature.name = f'{comb[0]},{comb[1]},{comb[2]},{comb[3]},{comb[4]}_mix'      
      fe_list.append(pd.DataFrame(feature))
    
  if fe_list==[]:
    features = pd.DataFrame()
  else:
    features = pd.concat(fe_list,axis=1) 
  res_dataframe = pd.concat([dataset,features],axis=1)

  return res_dataframe
In [34]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=380,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.08,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )
data_train = cat_prep(normed_fe_interaction_2_3_4_5(feats_train[good_columns], max_feats = 100,num_columns=num_columns),cat_columns)
data_val = cat_prep(normed_fe_interaction_2_3_4_5(feats_val[good_columns], max_feats = 100,num_columns=num_columns),cat_columns)

estimator_cb.fit(data_train, labels_train, cat_features = cat_columns,verbose=10, plot=True,
                 eval_set = (data_val, labels_val))
prb = estimator_cb.predict_proba(data_val)                                                                          
                                                                
print(f'ROC AUC: {roc_auc_score(labels_val.values,prb[:,1])}')    
print(f'PRC AUC: {average_precision_score(labels_val.values,prb[:,1])}')   
                                                                        
pred = estimator_cb.predict(data_val)
print(classification_report(labels_val,pred))  
print('------------------')
print('------------------')

pr_plot(labels_val.values,prb,[0.05,0.1,0.2,0.25,0.3,0.35,0.4,0.5,0.6,0.7,0.8,0.9,0.95,0.96,0.97,0.99])
0:	learn: 0.5555654	test: 0.5696517	best: 0.5696517 (0)	total: 215ms	remaining: 1m 21s
10:	learn: 0.5713860	test: 0.5765586	best: 0.5794135 (8)	total: 1.72s	remaining: 57.7s
20:	learn: 0.5861074	test: 0.5806159	best: 0.5806159 (20)	total: 3.35s	remaining: 57.3s
30:	learn: 0.6734353	test: 0.6672239	best: 0.6672239 (30)	total: 4.99s	remaining: 56.2s
40:	learn: 0.7155750	test: 0.7025345	best: 0.7028905 (39)	total: 6.73s	remaining: 55.7s
50:	learn: 0.7241156	test: 0.7102279	best: 0.7103319 (49)	total: 8.45s	remaining: 54.5s
60:	learn: 0.7336637	test: 0.7190003	best: 0.7190003 (60)	total: 10.3s	remaining: 53.9s
70:	learn: 0.7421160	test: 0.7243975	best: 0.7243975 (70)	total: 12s	remaining: 52.3s
80:	learn: 0.7472532	test: 0.7267382	best: 0.7267382 (80)	total: 13.8s	remaining: 50.9s
90:	learn: 0.7514526	test: 0.7300600	best: 0.7300600 (90)	total: 15.5s	remaining: 49.3s
100:	learn: 0.7528829	test: 0.7305117	best: 0.7305117 (100)	total: 17.3s	remaining: 47.7s
110:	learn: 0.7558333	test: 0.7301660	best: 0.7305934 (101)	total: 19.1s	remaining: 46.2s
120:	learn: 0.7586639	test: 0.7310759	best: 0.7314202 (116)	total: 20.8s	remaining: 44.5s
130:	learn: 0.7606825	test: 0.7322044	best: 0.7322044 (130)	total: 22.5s	remaining: 42.7s
140:	learn: 0.7622013	test: 0.7327795	best: 0.7327795 (140)	total: 24.1s	remaining: 40.8s
150:	learn: 0.7631042	test: 0.7324426	best: 0.7327795 (140)	total: 25.7s	remaining: 38.9s
160:	learn: 0.7644778	test: 0.7324788	best: 0.7327795 (140)	total: 27.3s	remaining: 37.2s
170:	learn: 0.7656200	test: 0.7323565	best: 0.7327795 (140)	total: 29s	remaining: 35.5s
180:	learn: 0.7666491	test: 0.7322071	best: 0.7327795 (140)	total: 30.6s	remaining: 33.7s
190:	learn: 0.7681058	test: 0.7320407	best: 0.7327795 (140)	total: 32.3s	remaining: 31.9s
200:	learn: 0.7693438	test: 0.7318354	best: 0.7327795 (140)	total: 33.9s	remaining: 30.1s
210:	learn: 0.7707496	test: 0.7323174	best: 0.7327795 (140)	total: 35.5s	remaining: 28.4s
220:	learn: 0.7722266	test: 0.7328783	best: 0.7328783 (220)	total: 37.1s	remaining: 26.7s
230:	learn: 0.7726614	test: 0.7326182	best: 0.7329575 (224)	total: 38.7s	remaining: 25s
240:	learn: 0.7738179	test: 0.7324210	best: 0.7329575 (224)	total: 40.4s	remaining: 23.3s
250:	learn: 0.7745325	test: 0.7325225	best: 0.7329575 (224)	total: 42s	remaining: 21.6s
260:	learn: 0.7754441	test: 0.7328376	best: 0.7329575 (224)	total: 43.6s	remaining: 19.9s
270:	learn: 0.7764252	test: 0.7323447	best: 0.7329575 (224)	total: 45.2s	remaining: 18.2s
280:	learn: 0.7781309	test: 0.7329403	best: 0.7329575 (224)	total: 46.8s	remaining: 16.5s
290:	learn: 0.7797206	test: 0.7327039	best: 0.7329575 (224)	total: 48.5s	remaining: 14.8s
300:	learn: 0.7805549	test: 0.7325470	best: 0.7329575 (224)	total: 50.1s	remaining: 13.1s
310:	learn: 0.7817929	test: 0.7322364	best: 0.7329575 (224)	total: 51.7s	remaining: 11.5s
320:	learn: 0.7824789	test: 0.7322575	best: 0.7329575 (224)	total: 53.3s	remaining: 9.8s
330:	learn: 0.7834846	test: 0.7321977	best: 0.7329575 (224)	total: 55s	remaining: 8.13s
340:	learn: 0.7836409	test: 0.7320223	best: 0.7329575 (224)	total: 56.3s	remaining: 6.44s
350:	learn: 0.7850824	test: 0.7325168	best: 0.7329575 (224)	total: 57.8s	remaining: 4.78s
360:	learn: 0.7857445	test: 0.7322787	best: 0.7329575 (224)	total: 59.4s	remaining: 3.12s
370:	learn: 0.7866741	test: 0.7327437	best: 0.7329575 (224)	total: 1m	remaining: 1.48s
379:	learn: 0.7871770	test: 0.7324616	best: 0.7329575 (224)	total: 1m 2s	remaining: 0us
bestTest = 0.7329575419
bestIteration = 224
Shrink model to first 225 iterations.
ROC AUC: 0.7329576669011431
PRC AUC: 0.201955393506825
              precision    recall  f1-score   support

          -1       0.93      1.00      0.96     11107
           1       0.44      0.01      0.02       893

    accuracy                           0.93     12000
   macro avg       0.68      0.50      0.49     12000
weighted avg       0.89      0.93      0.89     12000

------------------
------------------
In [35]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=380,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.08,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )
data = cat_prep(normed_fe_interaction_2_3_4_5(features[good_columns], max_feats = 100,num_columns=num_columns),cat_columns)
data_test = cat_prep(normed_fe_interaction_2_3_4_5(test_data[good_columns], max_feats = 100,num_columns=num_columns),cat_columns)

estimator_cb.fit(data, labels, cat_features = cat_columns,verbose=10, plot=True)
              
probs = estimator_cb.predict_proba(data_test)
write_to_submission_file(probs[:,1],out_file='submission_cb_new_fe8.csv')
0:	learn: 0.5765340	total: 221ms	remaining: 1m 23s
10:	learn: 0.5768688	total: 1.81s	remaining: 1m
20:	learn: 0.5865380	total: 3.3s	remaining: 56.5s
30:	learn: 0.6394052	total: 4.8s	remaining: 54.1s
40:	learn: 0.6967446	total: 6.49s	remaining: 53.7s
50:	learn: 0.7134748	total: 8.16s	remaining: 52.6s
60:	learn: 0.7281194	total: 9.95s	remaining: 52.1s
70:	learn: 0.7341011	total: 11.6s	remaining: 50.6s
80:	learn: 0.7391862	total: 13.3s	remaining: 49.1s
90:	learn: 0.7425467	total: 15s	remaining: 47.6s
100:	learn: 0.7461727	total: 16.6s	remaining: 45.9s
110:	learn: 0.7490683	total: 18.4s	remaining: 44.5s
120:	learn: 0.7504365	total: 20s	remaining: 42.9s
130:	learn: 0.7521587	total: 21.7s	remaining: 41.2s
140:	learn: 0.7532998	total: 23.4s	remaining: 39.6s
150:	learn: 0.7543221	total: 25.1s	remaining: 38.1s
160:	learn: 0.7553627	total: 26.7s	remaining: 36.3s
170:	learn: 0.7563469	total: 28.3s	remaining: 34.6s
180:	learn: 0.7572771	total: 29.9s	remaining: 32.9s
190:	learn: 0.7581932	total: 31.6s	remaining: 31.3s
200:	learn: 0.7592375	total: 33.2s	remaining: 29.6s
210:	learn: 0.7605810	total: 34.8s	remaining: 27.9s
220:	learn: 0.7616123	total: 36.4s	remaining: 26.2s
230:	learn: 0.7634794	total: 38.1s	remaining: 24.6s
240:	learn: 0.7643655	total: 39.7s	remaining: 22.9s
250:	learn: 0.7653859	total: 41.3s	remaining: 21.3s
260:	learn: 0.7662902	total: 43s	remaining: 19.6s
270:	learn: 0.7672104	total: 44.6s	remaining: 17.9s
280:	learn: 0.7676175	total: 46.2s	remaining: 16.3s
290:	learn: 0.7677737	total: 47.6s	remaining: 14.6s
300:	learn: 0.7680412	total: 49.1s	remaining: 12.9s
310:	learn: 0.7689114	total: 50.8s	remaining: 11.3s
320:	learn: 0.7698671	total: 52.4s	remaining: 9.63s
330:	learn: 0.7706643	total: 54s	remaining: 7.99s
340:	learn: 0.7711630	total: 55.5s	remaining: 6.34s
350:	learn: 0.7718726	total: 57.1s	remaining: 4.71s
360:	learn: 0.7727455	total: 58.7s	remaining: 3.09s
370:	learn: 0.7730926	total: 1m	remaining: 1.46s
379:	learn: 0.7734246	total: 1m 1s	remaining: 0us
In [36]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=350,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.08,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )
data = cat_prep(normed_fe_interaction_2_3_4_5(features[good_columns], max_feats = 50,num_columns=num_columns),cat_columns)
data_test = cat_prep(normed_fe_interaction_2_3_4_5(test_data[good_columns], max_feats = 50,num_columns=num_columns),cat_columns)

estimator_cb.fit(data, labels, cat_features = cat_columns,verbose=10, plot=True)
              
probs = estimator_cb.predict_proba(data_test)
write_to_submission_file(probs[:,1],out_file='submission_cb_new_fe9.csv')
0:	learn: 0.5667429	total: 177ms	remaining: 1m 1s
10:	learn: 0.5737682	total: 1.59s	remaining: 49.2s
20:	learn: 0.5810381	total: 2.9s	remaining: 45.5s
30:	learn: 0.6431125	total: 4.29s	remaining: 44.1s
40:	learn: 0.7041263	total: 5.81s	remaining: 43.8s
50:	learn: 0.7161064	total: 7.28s	remaining: 42.7s
60:	learn: 0.7247359	total: 8.78s	remaining: 41.6s
70:	learn: 0.7323563	total: 10.3s	remaining: 40.3s
80:	learn: 0.7385996	total: 11.8s	remaining: 39.1s
90:	learn: 0.7431650	total: 13.3s	remaining: 37.8s
100:	learn: 0.7470099	total: 14.8s	remaining: 36.4s
110:	learn: 0.7488531	total: 16.3s	remaining: 35.1s
120:	learn: 0.7507526	total: 17.8s	remaining: 33.6s
130:	learn: 0.7536470	total: 19.3s	remaining: 32.3s
140:	learn: 0.7545087	total: 20.8s	remaining: 30.8s
150:	learn: 0.7558425	total: 22.2s	remaining: 29.2s
160:	learn: 0.7575546	total: 23.5s	remaining: 27.6s
170:	learn: 0.7581857	total: 25s	remaining: 26.2s
180:	learn: 0.7587169	total: 26.4s	remaining: 24.6s
190:	learn: 0.7599850	total: 27.9s	remaining: 23.2s
200:	learn: 0.7606665	total: 29.3s	remaining: 21.7s
210:	learn: 0.7626397	total: 30.8s	remaining: 20.3s
220:	learn: 0.7634836	total: 32.2s	remaining: 18.8s
230:	learn: 0.7643421	total: 33.6s	remaining: 17.3s
240:	learn: 0.7649774	total: 35s	remaining: 15.8s
250:	learn: 0.7656030	total: 36.5s	remaining: 14.4s
260:	learn: 0.7664486	total: 37.9s	remaining: 12.9s
270:	learn: 0.7672578	total: 39.4s	remaining: 11.5s
280:	learn: 0.7676688	total: 40.9s	remaining: 10s
290:	learn: 0.7683996	total: 42.5s	remaining: 8.61s
300:	learn: 0.7686475	total: 44s	remaining: 7.16s
310:	learn: 0.7691620	total: 45.5s	remaining: 5.7s
320:	learn: 0.7692952	total: 46.9s	remaining: 4.24s
330:	learn: 0.7701636	total: 48.4s	remaining: 2.78s
340:	learn: 0.7707786	total: 49.8s	remaining: 1.31s
349:	learn: 0.7712512	total: 51s	remaining: 0us
In [37]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=450,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.08,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )
data = cat_prep(normed_fe_interaction_2_3_4_5(features[good_columns], max_feats = 200,num_columns=num_columns),cat_columns)
data_test = cat_prep(normed_fe_interaction_2_3_4_5(test_data[good_columns], max_feats = 200,num_columns=num_columns),cat_columns)

estimator_cb.fit(data, labels, cat_features = cat_columns,verbose=10, plot=True)
              
probs = estimator_cb.predict_proba(data_test)
write_to_submission_file(probs[:,1],out_file='submission_cb_new_fe10.csv')
0:	learn: 0.5737893	total: 277ms	remaining: 2m 4s
10:	learn: 0.5749083	total: 2.23s	remaining: 1m 29s
20:	learn: 0.5837294	total: 4.12s	remaining: 1m 24s
30:	learn: 0.6687396	total: 6.08s	remaining: 1m 22s
40:	learn: 0.6992503	total: 8.18s	remaining: 1m 21s
50:	learn: 0.7118391	total: 10.2s	remaining: 1m 19s
60:	learn: 0.7230413	total: 12.3s	remaining: 1m 18s
70:	learn: 0.7301466	total: 14.4s	remaining: 1m 16s
80:	learn: 0.7353533	total: 16.4s	remaining: 1m 14s
90:	learn: 0.7388830	total: 18.5s	remaining: 1m 12s
100:	learn: 0.7424933	total: 20.6s	remaining: 1m 11s
110:	learn: 0.7446024	total: 22.8s	remaining: 1m 9s
120:	learn: 0.7474078	total: 24.9s	remaining: 1m 7s
130:	learn: 0.7506405	total: 27.1s	remaining: 1m 6s
140:	learn: 0.7527396	total: 29.3s	remaining: 1m 4s
150:	learn: 0.7547552	total: 31.3s	remaining: 1m 2s
160:	learn: 0.7556532	total: 33.3s	remaining: 59.7s
170:	learn: 0.7568748	total: 35.2s	remaining: 57.4s
180:	learn: 0.7577535	total: 37s	remaining: 55s
190:	learn: 0.7592937	total: 39s	remaining: 52.9s
200:	learn: 0.7614064	total: 41.1s	remaining: 50.9s
210:	learn: 0.7627760	total: 43.1s	remaining: 48.9s
220:	learn: 0.7636509	total: 45.1s	remaining: 46.7s
230:	learn: 0.7645048	total: 47.1s	remaining: 44.7s
240:	learn: 0.7649277	total: 49s	remaining: 42.5s
250:	learn: 0.7658036	total: 51s	remaining: 40.4s
260:	learn: 0.7668829	total: 53.1s	remaining: 38.5s
270:	learn: 0.7676768	total: 55.2s	remaining: 36.5s
280:	learn: 0.7679174	total: 57.1s	remaining: 34.3s
290:	learn: 0.7687054	total: 59.1s	remaining: 32.3s
300:	learn: 0.7691593	total: 1m	remaining: 30.1s
310:	learn: 0.7701891	total: 1m 2s	remaining: 28.1s
320:	learn: 0.7712246	total: 1m 4s	remaining: 26s
330:	learn: 0.7720993	total: 1m 6s	remaining: 24s
340:	learn: 0.7731484	total: 1m 8s	remaining: 22s
350:	learn: 0.7743510	total: 1m 11s	remaining: 20s
360:	learn: 0.7759219	total: 1m 13s	remaining: 18s
370:	learn: 0.7767734	total: 1m 15s	remaining: 16s
380:	learn: 0.7776394	total: 1m 17s	remaining: 14s
390:	learn: 0.7781210	total: 1m 18s	remaining: 11.9s
400:	learn: 0.7787829	total: 1m 20s	remaining: 9.89s
410:	learn: 0.7797469	total: 1m 22s	remaining: 7.87s
420:	learn: 0.7807559	total: 1m 24s	remaining: 5.85s
430:	learn: 0.7820039	total: 1m 27s	remaining: 3.84s
440:	learn: 0.7827273	total: 1m 29s	remaining: 1.82s
449:	learn: 0.7833245	total: 1m 30s	remaining: 0us

image.png

In [38]:
sub_names = ['submission_cb_new_fe1','submission_mean3','submission_mean4','submission_mean5','submission_cb_new_fe6',
             'submission_mean6','submission_mean7','submission_mean8','submission_mean9',
             'submission_cb_new_fe8','submission_cb_new_fe9']
submission_list = []
for submission_name in sub_names:
  sub = pd.read_csv(f'{submission_name}.csv',index_col=0)
  submission_list.append(sub) 
sub_df = pd.concat(submission_list,axis=1)
sub_df['mean'] = sub_df.mean(axis=1)
write_to_submission_file(sub_df[['mean']].values,out_file='submission_mean10.csv')

image.png

In [20]:
def categ_fe_interaction(dataset, max_feats = 10,cat_columns=[]):
  #Создает признаки на основе парных комбинаций существующих
  dataset = dataset.copy()
  data = dataset[cat_columns]
  
  fe_list = []
 
  for comb in list(itertools.combinations(data.columns,2))[:max_feats]:
  
  
      feature = data[comb[0]].astype(str) + data[comb[1]].astype(str)
      feature.name = f'{comb[0]},{comb[1]}_mix'

      fe_list.append(pd.DataFrame(feature))
   
    
  if fe_list==[]:
    features = pd.DataFrame()
  else:
    features = pd.concat(fe_list,axis=1) 
  res_dataframe = pd.concat([dataset,features],axis=1)

  cat_columns_new = cat_columns + list(features.columns)


  return res_dataframe,cat_columns_new 
In [21]:
def metric_estimate_prediction(model,data_val,labels_val,cat_columns):
    
  prb = model.predict_proba(data_val)                                                                          
                                                                
  metric = roc_auc_score(labels_val.values,prb[:,1])

  return metric   




def feature_selection_loop_prediction(model,data_val,labels_val,cat_columns,threshold=1e-4):

  data_val = data_val.copy()

  metric_start = metric_estimate_prediction(model,data_val,labels_val,cat_columns)
  good_cat_columns = []
  good_num_columns = []
  diff_metric_without_fe = {}
  for col in tqdm_notebook(data_val.columns):

    data_permuted = data_val.copy()
    data_permuted[col] =  np.random.permutation(data_permuted[col].values)

    metric_fe_out = metric_estimate_prediction(model,data_permuted,labels_val,cat_columns)

    diff_metric = metric_fe_out - metric_start 
    diff_metric_without_fe[col] = diff_metric
    
    print('----------')   
    print(f'{col} diff_metric: {diff_metric}')
    print('----------')

    if diff_metric < -threshold*metric_start:

      if col in cat_columns:
        good_cat_columns.append(col)
        print('----------')
        print(f'good_cat_columns: {good_cat_columns}')

      else:
        good_num_columns.append(col)
        print('----------')
        print(f'good_num_columns: {good_num_columns}')

    gc.collect()

  return good_num_columns,good_cat_columns,diff_metric_without_fe
In [ ]:
 
In [40]:
data_plus_new_num_train = normed_fe_interaction(feats_train[good_columns],level=2, max_feats = 100,num_columns=num_columns)
data_plus_new_num_val = normed_fe_interaction(feats_val[good_columns],level=2, max_feats = 100,num_columns=num_columns)

data_plus_num_cat_train,cat_columns_new = categ_fe_interaction(data_plus_new_num_train, max_feats = 100,cat_columns=cat_columns)
data_plus_num_cat_val = categ_fe_interaction(data_plus_new_num_val, max_feats = 100,cat_columns=cat_columns)[0]

data_train = cat_prep(data_plus_num_cat_train,cat_columns_new)
data_val = cat_prep(data_plus_num_cat_val,cat_columns_new)

data_train.head()
Out[40]:
Var6 Var13 Var21 Var22 Var24 Var25 Var28 Var38 Var57 Var73 Var74 Var76 Var81 Var83 Var85 Var94 Var109 Var112 Var113 Var119 Var123 Var125 Var126 Var133 Var134 Var140 Var149 Var153 Var160 Var163 Var189 Var7 Var35 Var44 Var65 Var72 Var78 Var132 Var143 Var144 ... Var35,Var221_mix Var35,Var223_mix Var35,Var225_mix Var35,Var226_mix Var35,Var227_mix Var35,Var229_mix Var35,Var192_mix Var35,Var193_mix Var35,Var197_mix Var35,Var198_mix Var35,Var199_mix Var35,Var202_mix Var35,Var204_mix Var35,Var212_mix Var35,Var216_mix Var35,Var217_mix Var35,Var220_mix Var35,Var222_mix Var35,Var228_mix Var44,Var65_mix Var44,Var72_mix Var44,Var78_mix Var44,Var132_mix Var44,Var143_mix Var44,Var144_mix Var44,Var173_mix Var44,Var181_mix Var44,Var195_mix Var44,Var196_mix Var44,Var203_mix Var44,Var205_mix Var44,Var206_mix Var44,Var207_mix Var44,Var208_mix Var44,Var210_mix Var44,Var211_mix Var44,Var218_mix Var44,Var219_mix Var44,Var221_mix Var44,Var223_mix
6892 NaN NaN NaN NaN NaN NaN NaN NaN 0.657125 8 NaN NaN NaN NaN NaN NaN NaN NaN -1395772.00 NaN NaN NaN -18.0 NaN NaN NaN NaN NaN NaN NaN NaN nan nan nan nan nan nan nan nan nan ... nanoslk nanLM8l689qOp nannan nanQu4f nanRAYp nannan nanDHeTmBftjz nanRO12 nan7gSz nan8ij6Lg8 nanLJF4fPp nanBcur nanZ5OU nanNhsEn4L nan7WwCtIM nan1GbF nan7OmVzos nan76DJixu nanF2FyR07IdsN7I nannan nannan nannan nannan nannan nannan nannan nannan nantaul nan1K8T nan9_Y1 nanVpdQ nannan nanme75fM6ugJ nankIsH nanuKAI nanMtgm nancJvF nanAU8pNoi nanoslk nanLM8l689qOp
34821 NaN NaN NaN NaN NaN NaN NaN NaN 0.117069 10 NaN NaN NaN NaN NaN NaN NaN NaN 390151.60 NaN NaN NaN 4.0 NaN NaN NaN NaN NaN NaN NaN NaN nan nan nan nan nan nan nan nan nan ... nanoslk nannan nannan nanw_Ub nanRAYp nannan nan2jigUH7ejg nanRO12 nandm89 nanLG0vbUP nann1zVHpT8NN nan5FzM nanDmlN nanNhsEn4L nanmAja5EA nanFJ56cYO nansE0uLpj nanG9maF5M nanF2FyR07IdsN7I nannan nannan nannan nannan nannan nannan nannan nannan nantaul nan1K8T nan9_Y1 nan09_Q nannan nanme75fM6ugJ nankIsH nanuKAI nanL84s nancJvF nannan nanoslk nannan
34190 98.0 0.0 152.0 190.0 0.0 64.0 86.96 4107204.0 2.271523 14 0.0 691200.0 147468.60 25.0 10.0 252417.0 24.0 8.0 -150260.80 315.0 66.0 0.0 -26.0 1585040.0 728196.0 0.0 0.0 8437160.0 16.0 1426026.0 NaN 0.0 5.0 0.0 18.0 6.0 0.0 8.0 0.0 0.0 ... 5.0oslk 5.0LM8l689qOp 5.0nan 5.0Qu4f 5.0RAYp 5.0nan 5.0zcRZptzip9 5.0RO12 5.0USOt 5.0pro8v8X 5.0CsjH_hi 5.0rUBc 5.0t_4G 5.0NhsEn4L 5.0mAjDcoz 5.0xYrN 5.0meWVy8V 5.0DQ3u3MC 5.0F2FyR07IdsN7I 0.018.0 0.06.0 0.00.0 0.08.0 0.00.0 0.00.0 0.00.0 0.00.0 0.0taul 0.01K8T 0.09_Y1 0.0VpdQ 0.0IYzP 0.0me75fM6ugJ 0.0sBgB 0.0uKAI 0.0L84s 0.0cJvF 0.0FzaX 0.0oslk 0.0LM8l689qOp
24541 938.0 520.0 148.0 185.0 2.0 96.0 186.64 749586.0 0.714591 160 77.0 2013720.0 100805.70 5.0 10.0 168123.0 48.0 32.0 32282.84 600.0 30.0 68571.0 NaN 3873950.0 2044820.0 4160.0 604800.0 10517440.0 20.0 2592000.0 NaN 7.0 0.0 0.0 18.0 6.0 0.0 0.0 0.0 18.0 ... 0.0zCkv 0.0jySVZNlOJy 0.0ELof 0.0WqMG 0.0ZI9m 0.0mj86 0.0mzKvyx8zhV 0.02Knk1KF 0.0vSNn 0.0fhk21Ss 0.0Hz673939hSRjL 0.0W9XQ 0.0QMes 0.0Ie_5MZs 0.0XTbjhEX 0.0F6F0 0.04UxGlow 0.0catzS2D 0.0TCU50_Yjmm6GIBZ0lL_ 0.018.0 0.06.0 0.00.0 0.00.0 0.00.0 0.018.0 0.00.0 0.00.0 0.0taul 0.01K8T 0.09_Y1 0.0VpdQ 0.0wMei 0.07M47J5GA0pTYIFxg5uy 0.0kIsH 0.0uKAI 0.0L84s 0.0cJvF 0.0FzaX 0.0zCkv 0.0jySVZNlOJy
31483 602.0 88.0 0.0 0.0 NaN 0.0 166.56 0.0 3.250160 30 0.0 0.0 6388.71 0.0 0.0 1194.0 NaN 0.0 81176.80 100.0 0.0 9657.0 4.0 0.0 0.0 385.0 NaN 0.0 0.0 0.0 NaN 7.0 0.0 0.0 9.0 3.0 0.0 0.0 0.0 0.0 ... 0.0oslk 0.0LM8l689qOp 0.0nan 0.0FSa2 0.0RAYp 0.0nan 0.0639qrQK2Mx 0.0RO12 0.0AHgj 0.0creg0bq 0.05q1hF23 0.06Yf9 0.015m3 0.0NhsEn4L 0.0mAjbk_S 0.0oLcf 0.0VgKv48t 0.0nRgz4Af 0.0F2FyR07IdsN7I 0.09.0 0.03.0 0.00.0 0.00.0 0.00.0 0.00.0 0.00.0 0.00.0 0.0taul 0.01K8T 0.09_Y1 0.0sJzTlal 0.0IYzP 0.0me75fM6ugJ 0.0kIsH 0.0uKAI 0.0L84s 0.0cJvF 0.0FzaX 0.0oslk 0.0LM8l689qOp

5 rows × 272 columns

In [42]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=380,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.08,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )


estimator_cb.fit(data_train, labels_train, cat_features = cat_columns_new,verbose=10, plot=True,
                 eval_set = (data_val, labels_val))
prb = estimator_cb.predict_proba(data_val)                                                                          
                                                                
print(f'ROC AUC: {roc_auc_score(labels_val.values,prb[:,1])}')    
print(f'PRC AUC: {average_precision_score(labels_val.values,prb[:,1])}')   
                                                                        
pred = estimator_cb.predict(data_val)
print(classification_report(labels_val,pred))  
print('------------------')
print('------------------')

pr_plot(labels_val.values,prb,[0.05,0.1,0.2,0.25,0.3,0.35,0.4,0.5,0.6,0.7,0.8,0.9,0.95,0.96,0.97,0.99])
0:	learn: 0.5430850	test: 0.5344066	best: 0.5344066 (0)	total: 350ms	remaining: 2m 12s
10:	learn: 0.5712767	test: 0.5797211	best: 0.5805307 (6)	total: 3.25s	remaining: 1m 48s
20:	learn: 0.5772293	test: 0.5859278	best: 0.5866229 (17)	total: 6.15s	remaining: 1m 45s
30:	learn: 0.6685320	test: 0.6664833	best: 0.6664833 (30)	total: 8.99s	remaining: 1m 41s
40:	learn: 0.7082075	test: 0.7016789	best: 0.7016789 (40)	total: 12.3s	remaining: 1m 41s
50:	learn: 0.7215861	test: 0.7131334	best: 0.7131334 (50)	total: 15.7s	remaining: 1m 41s
60:	learn: 0.7276231	test: 0.7164901	best: 0.7169411 (58)	total: 19.1s	remaining: 1m 39s
70:	learn: 0.7357169	test: 0.7223550	best: 0.7223550 (70)	total: 22.4s	remaining: 1m 37s
80:	learn: 0.7402230	test: 0.7237323	best: 0.7237323 (80)	total: 25.8s	remaining: 1m 35s
90:	learn: 0.7438513	test: 0.7261963	best: 0.7261963 (90)	total: 28.9s	remaining: 1m 31s
100:	learn: 0.7475068	test: 0.7266419	best: 0.7266419 (100)	total: 32.2s	remaining: 1m 28s
110:	learn: 0.7502545	test: 0.7269056	best: 0.7269056 (110)	total: 35.5s	remaining: 1m 26s
120:	learn: 0.7529143	test: 0.7268614	best: 0.7270021 (118)	total: 38.6s	remaining: 1m 22s
130:	learn: 0.7538267	test: 0.7269072	best: 0.7272654 (124)	total: 41.6s	remaining: 1m 19s
140:	learn: 0.7555699	test: 0.7273316	best: 0.7273316 (140)	total: 44.9s	remaining: 1m 16s
150:	learn: 0.7587590	test: 0.7293946	best: 0.7293946 (150)	total: 48s	remaining: 1m 12s
160:	learn: 0.7603617	test: 0.7293204	best: 0.7294953 (152)	total: 51.1s	remaining: 1m 9s
170:	learn: 0.7616912	test: 0.7296443	best: 0.7297097 (168)	total: 54.1s	remaining: 1m 6s
180:	learn: 0.7630305	test: 0.7300402	best: 0.7300402 (180)	total: 57.2s	remaining: 1m 2s
190:	learn: 0.7646331	test: 0.7298221	best: 0.7300402 (180)	total: 1m	remaining: 59.7s
200:	learn: 0.7674114	test: 0.7312775	best: 0.7312775 (200)	total: 1m 3s	remaining: 56.6s
210:	learn: 0.7679805	test: 0.7314438	best: 0.7314438 (210)	total: 1m 6s	remaining: 53.2s
220:	learn: 0.7690201	test: 0.7315281	best: 0.7315442 (218)	total: 1m 9s	remaining: 50s
230:	learn: 0.7698592	test: 0.7312800	best: 0.7315442 (218)	total: 1m 12s	remaining: 46.8s
240:	learn: 0.7712882	test: 0.7316744	best: 0.7317991 (236)	total: 1m 15s	remaining: 43.7s
250:	learn: 0.7722530	test: 0.7312591	best: 0.7317991 (236)	total: 1m 18s	remaining: 40.5s
260:	learn: 0.7734852	test: 0.7311968	best: 0.7317991 (236)	total: 1m 21s	remaining: 37.3s
270:	learn: 0.7748757	test: 0.7312898	best: 0.7318253 (263)	total: 1m 24s	remaining: 34.2s
280:	learn: 0.7773530	test: 0.7338365	best: 0.7338365 (280)	total: 1m 28s	remaining: 31s
290:	learn: 0.7781997	test: 0.7346498	best: 0.7346733 (289)	total: 1m 31s	remaining: 27.9s
300:	learn: 0.7799236	test: 0.7349263	best: 0.7350927 (295)	total: 1m 34s	remaining: 24.8s
310:	learn: 0.7812293	test: 0.7353945	best: 0.7357199 (308)	total: 1m 37s	remaining: 21.7s
320:	learn: 0.7826680	test: 0.7354013	best: 0.7357199 (308)	total: 1m 40s	remaining: 18.5s
330:	learn: 0.7838538	test: 0.7353581	best: 0.7357199 (308)	total: 1m 43s	remaining: 15.4s
340:	learn: 0.7851864	test: 0.7349252	best: 0.7357199 (308)	total: 1m 46s	remaining: 12.2s
350:	learn: 0.7862698	test: 0.7348866	best: 0.7357199 (308)	total: 1m 49s	remaining: 9.07s
360:	learn: 0.7865057	test: 0.7348942	best: 0.7357199 (308)	total: 1m 52s	remaining: 5.93s
370:	learn: 0.7871310	test: 0.7350078	best: 0.7357199 (308)	total: 1m 55s	remaining: 2.81s
379:	learn: 0.7878544	test: 0.7347130	best: 0.7357199 (308)	total: 1m 58s	remaining: 0us
bestTest = 0.7357199192
bestIteration = 308
Shrink model to first 309 iterations.
ROC AUC: 0.7357198647262085
PRC AUC: 0.21167490513104
              precision    recall  f1-score   support

          -1       0.93      1.00      0.96     11107
           1       0.50      0.01      0.03       893

    accuracy                           0.93     12000
   macro avg       0.71      0.51      0.49     12000
weighted avg       0.89      0.93      0.89     12000

------------------
------------------
In [43]:
data_plus_new_num = normed_fe_interaction(features[good_columns],level=2, max_feats = 100,num_columns=num_columns)
data_plus_new_num_test = normed_fe_interaction(test_data[good_columns],level=2, max_feats = 100,num_columns=num_columns)

data_plus_num_cat,cat_columns_new = categ_fe_interaction(data_plus_new_num, max_feats = 100,cat_columns=cat_columns)
data_plus_num_cat_test = categ_fe_interaction(data_plus_new_num_test, max_feats = 100,cat_columns=cat_columns)[0]

data = cat_prep(data_plus_num_cat,cat_columns_new)
data_test = cat_prep(data_plus_num_cat_test,cat_columns_new)

data.head()
Out[43]:
Var6 Var13 Var21 Var22 Var24 Var25 Var28 Var38 Var57 Var73 Var74 Var76 Var81 Var83 Var85 Var94 Var109 Var112 Var113 Var119 Var123 Var125 Var126 Var133 Var134 Var140 Var149 Var153 Var160 Var163 Var189 Var7 Var35 Var44 Var65 Var72 Var78 Var132 Var143 Var144 ... Var35,Var221_mix Var35,Var223_mix Var35,Var225_mix Var35,Var226_mix Var35,Var227_mix Var35,Var229_mix Var35,Var192_mix Var35,Var193_mix Var35,Var197_mix Var35,Var198_mix Var35,Var199_mix Var35,Var202_mix Var35,Var204_mix Var35,Var212_mix Var35,Var216_mix Var35,Var217_mix Var35,Var220_mix Var35,Var222_mix Var35,Var228_mix Var44,Var65_mix Var44,Var72_mix Var44,Var78_mix Var44,Var132_mix Var44,Var143_mix Var44,Var144_mix Var44,Var173_mix Var44,Var181_mix Var44,Var195_mix Var44,Var196_mix Var44,Var203_mix Var44,Var205_mix Var44,Var206_mix Var44,Var207_mix Var44,Var208_mix Var44,Var210_mix Var44,Var211_mix Var44,Var218_mix Var44,Var219_mix Var44,Var221_mix Var44,Var223_mix
0 3052.0 NaN 480.0 600.0 20.0 480.0 200.00 82752.0 2.907926 34 NaN 716008.0 14599.92 5.0 32.0 NaN 144.0 144.0 -1209960.0 1660.0 66.0 NaN 4.0 326915.0 604276.0 NaN 389396.0 2313888.0 28.0 599532.0 NaN nan 0.0 0.0 nan nan 0.0 0.0 0.0 9.0 ... 0.0Al6ZaUT 0.0LM8l689qOp 0.0nan 0.0fKCe 0.002N6s8f 0.0nan 0.0NESt0G8EIb 0.0AERks4l 0.00LaQ 0.0UaKK0yW 0.0I1sFbv_0IT 0.0EkHG 0.0k13i 0.0JBfYVit4g8 0.0TDctq2l 0.0KmRo 0.0hLKtJ9p 0.0vr93T2a 0.0xwM2aC7IdeMC0 0.0nan 0.0nan 0.00.0 0.00.0 0.00.0 0.09.0 0.00.0 0.00.0 0.0taul 0.01K8T 0.09_Y1 0.009_Q 0.0IYzP 0.0GjJ35utlTa_GNSvxxpb9ju 0.0kIsH 0.0uKAI 0.0L84s 0.0cJvF 0.0FzaX 0.0Al6ZaUT 0.0LM8l689qOp
1 1813.0 636.0 212.0 265.0 2.0 128.0 166.56 2706120.0 5.870327 128 0.0 1661128.0 67529.09 25.0 10.0 32289.0 80.0 72.0 417932.0 1025.0 66.0 24912.0 40.0 1934460.0 349568.0 205.0 735.0 6502680.0 14.0 364182.0 276.0 7.0 0.0 0.0 27.0 3.0 0.0 0.0 0.0 18.0 ... 0.0oslk 0.0LM8l689qOp 0.0ELof 0.0xb3V 0.0RAYp 0.0mj86 0.0P1WvyxLp3Z 0.02Knk1KF 0.0YFAj 0.0Bnunsla 0.0o64y9zI 0.0JDd6 0.0FbIm 0.0XfqtO3UdzaXh_ 0.0XTbqizz 0.0qMoY 0.0hN8KpA1 0.06hQ9lNX 0.055YFVY9 0.027.0 0.03.0 0.00.0 0.00.0 0.00.0 0.018.0 0.00.0 0.00.0 0.0taul 0.01K8T 0.09_Y1 0.0VpdQ 0.0haYg 0.0me75fM6ugJ 0.0kIsH 0.0uKAI 0.0L84s 0.0cJvF 0.0FzaX 0.0oslk 0.0LM8l689qOp
2 1953.0 448.0 176.0 220.0 0.0 72.0 311.76 4698780.0 5.981628 166 245.0 3025152.0 85266.00 35.0 0.0 53388.0 40.0 48.0 -124655.2 590.0 78.0 7218.0 36.0 3148410.0 1086210.0 400.0 0.0 10569040.0 18.0 0.0 NaN 7.0 0.0 0.0 18.0 3.0 0.0 0.0 0.0 27.0 ... 0.0zCkv 0.0LM8l689qOp 0.0nan 0.0FSa2 0.0ZI9m 0.0mj86 0.0FoxgUHSK8h 0.0LrdZy8QqgUfkVShG 0.0TyGl 0.0fhk21Ss 0.0nQUveAzAF7 0.0dnwD 0.0mTeA 0.04kVnq_T26xq1p 0.0pMWBUmQ 0.0qLXr 0.04UxGlow 0.0catzS2D 0.0ib5G6X1eUxUn6 0.018.0 0.03.0 0.00.0 0.00.0 0.00.0 0.027.0 0.00.0 0.00.0 0.0taul 0.01K8T 0.09_Y1 0.0VpdQ 0.0hAFG 0.07M47J5GA0pTYIFxg5uy 0.0kIsH 0.0uKAI 0.0L84s 0.0UYBR 0.0FzaX 0.0zCkv 0.0LM8l689qOp
3 1533.0 4.0 332.0 415.0 0.0 144.0 220.08 864384.0 5.108097 30 0.0 2642240.0 74107.20 10.0 2.0 NaN 32.0 32.0 378473.6 1435.0 24.0 693.0 NaN 7066700.0 650390.0 5.0 0.0 9676200.0 108.0 253284.0 NaN 7.0 5.0 0.0 9.0 nan 0.0 8.0 0.0 0.0 ... 5.0oslk 5.0LM8l689qOp 5.0nan 5.0xb3V 5.0RAYp 5.0nan 5.0vNEvyxLp3Z 5.0RO12 5.00Xwj 5.0uoZk2Zj 5.0LWyxgtXeJL 5.0CwmB 5.0vzJD 5.0NhsEn4L 5.0kZJtVhC 5.0JC0e 5.0ylCK5YS 5.0e4lqvY0 5.0F2FyR07IdsN7I 0.09.0 0.0nan 0.00.0 0.08.0 0.00.0 0.00.0 0.00.0 0.00.0 0.0taul 0.01K8T 0.0F3hy 0.0VpdQ 0.0IYzP 0.0me75fM6ugJ 0.0kIsH 0.0uKAI 0.0L84s 0.0cJvF 0.0FzaX 0.0oslk 0.0LM8l689qOp
4 686.0 0.0 160.0 200.0 2.0 48.0 278.00 4364880.0 0.650716 32 0.0 1440.0 171072.90 25.0 12.0 106455.0 32.0 8.0 142602.4 490.0 60.0 468.0 -28.0 3794460.0 642816.0 225.0 554414.0 10535200.0 24.0 2851284.0 NaN 7.0 0.0 0.0 9.0 3.0 0.0 0.0 0.0 9.0 ... 0.0oslk 0.0LM8l689qOp 0.0nan 0.0WqMG 0.0RAYp 0.0nan 0.04e7gUH7IEC 0.0RO12 0.0vSNn 0.0kugYdIL 0.0ZIXKpoNpqq 0.0625Z 0.0m_h1 0.0NhsEn4L 0.0NGZXfGp 0.0064o 0.0PYpzAu9 0.0MAz3HNj 0.0F2FyR07IdsN7I 0.09.0 0.03.0 0.00.0 0.00.0 0.00.0 0.09.0 0.00.0 0.00.0 0.0taul 0.01K8T 0.09_Y1 0.0sJzTlal 0.0zm5i 0.0me75fM6ugJ 0.0kIsH 0.0uKAI 0.0L84s 0.0cJvF 0.0FzaX 0.0oslk 0.0LM8l689qOp

5 rows × 272 columns

In [45]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=380,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.08,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )
 

estimator_cb.fit(data, labels, cat_features = cat_columns_new,verbose=10, plot=True)
              
probs = estimator_cb.predict_proba(data_test)
write_to_submission_file(probs[:,1],out_file='submission_cb_new_fe_final1.csv')
0:	learn: 0.5663509	total: 295ms	remaining: 1m 51s
10:	learn: 0.5834329	total: 3.1s	remaining: 1m 43s
20:	learn: 0.5870177	total: 5.97s	remaining: 1m 42s
30:	learn: 0.6619694	total: 9.03s	remaining: 1m 41s
40:	learn: 0.6969404	total: 12.2s	remaining: 1m 41s
50:	learn: 0.7083168	total: 15.5s	remaining: 1m 40s
60:	learn: 0.7177288	total: 18.8s	remaining: 1m 38s
70:	learn: 0.7245206	total: 21.9s	remaining: 1m 35s
80:	learn: 0.7308877	total: 25.2s	remaining: 1m 33s
90:	learn: 0.7358207	total: 28.6s	remaining: 1m 30s
100:	learn: 0.7391955	total: 31.8s	remaining: 1m 27s
110:	learn: 0.7421244	total: 35.1s	remaining: 1m 24s
120:	learn: 0.7462179	total: 38.3s	remaining: 1m 22s
130:	learn: 0.7485546	total: 41.5s	remaining: 1m 18s
140:	learn: 0.7507052	total: 44.7s	remaining: 1m 15s
150:	learn: 0.7524530	total: 47.7s	remaining: 1m 12s
160:	learn: 0.7532978	total: 50.7s	remaining: 1m 8s
170:	learn: 0.7545121	total: 53.8s	remaining: 1m 5s
180:	learn: 0.7555951	total: 56.7s	remaining: 1m 2s
190:	learn: 0.7560489	total: 59.6s	remaining: 59s
200:	learn: 0.7565040	total: 1m 2s	remaining: 55.8s
210:	learn: 0.7586303	total: 1m 5s	remaining: 52.7s
220:	learn: 0.7590261	total: 1m 8s	remaining: 49.4s
230:	learn: 0.7603382	total: 1m 11s	remaining: 46.2s
240:	learn: 0.7609506	total: 1m 14s	remaining: 43.1s
250:	learn: 0.7620266	total: 1m 17s	remaining: 39.9s
260:	learn: 0.7634740	total: 1m 20s	remaining: 36.8s
270:	learn: 0.7642303	total: 1m 23s	remaining: 33.7s
280:	learn: 0.7651455	total: 1m 26s	remaining: 30.6s
290:	learn: 0.7660416	total: 1m 29s	remaining: 27.5s
300:	learn: 0.7667048	total: 1m 32s	remaining: 24.4s
310:	learn: 0.7684523	total: 1m 36s	remaining: 21.3s
320:	learn: 0.7693343	total: 1m 39s	remaining: 18.2s
330:	learn: 0.7699862	total: 1m 42s	remaining: 15.1s
340:	learn: 0.7703937	total: 1m 44s	remaining: 12s
350:	learn: 0.7710688	total: 1m 48s	remaining: 8.93s
360:	learn: 0.7717406	total: 1m 51s	remaining: 5.84s
370:	learn: 0.7722687	total: 1m 53s	remaining: 2.76s
379:	learn: 0.7725327	total: 1m 56s	remaining: 0us

image.png

In [48]:
 
In [47]:
data_plus_new_num_train = normed_fe_interaction_2_3_4_5(feats_train[good_columns],max_feats = 100,num_columns=num_columns)
data_plus_new_num_val = normed_fe_interaction_2_3_4_5(feats_val[good_columns],max_feats = 100,num_columns=num_columns)

data_plus_num_cat_train,cat_columns_new = categ_fe_interaction(data_plus_new_num_train, max_feats = 100,cat_columns=cat_columns)
data_plus_num_cat_val = categ_fe_interaction(data_plus_new_num_val, max_feats = 100,cat_columns=cat_columns)[0]

data_train = cat_prep(data_plus_num_cat_train,cat_columns_new)
data_val = cat_prep(data_plus_num_cat_val,cat_columns_new)

data_train.head()
Out[47]:
Var6 Var13 Var21 Var22 Var24 Var25 Var28 Var38 Var57 Var73 Var74 Var76 Var81 Var83 Var85 Var94 Var109 Var112 Var113 Var119 Var123 Var125 Var126 Var133 Var134 Var140 Var149 Var153 Var160 Var163 Var189 Var7 Var35 Var44 Var65 Var72 Var78 Var132 Var143 Var144 ... Var35,Var221_mix Var35,Var223_mix Var35,Var225_mix Var35,Var226_mix Var35,Var227_mix Var35,Var229_mix Var35,Var192_mix Var35,Var193_mix Var35,Var197_mix Var35,Var198_mix Var35,Var199_mix Var35,Var202_mix Var35,Var204_mix Var35,Var212_mix Var35,Var216_mix Var35,Var217_mix Var35,Var220_mix Var35,Var222_mix Var35,Var228_mix Var44,Var65_mix Var44,Var72_mix Var44,Var78_mix Var44,Var132_mix Var44,Var143_mix Var44,Var144_mix Var44,Var173_mix Var44,Var181_mix Var44,Var195_mix Var44,Var196_mix Var44,Var203_mix Var44,Var205_mix Var44,Var206_mix Var44,Var207_mix Var44,Var208_mix Var44,Var210_mix Var44,Var211_mix Var44,Var218_mix Var44,Var219_mix Var44,Var221_mix Var44,Var223_mix
6892 NaN NaN NaN NaN NaN NaN NaN NaN 0.657125 8 NaN NaN NaN NaN NaN NaN NaN NaN -1395772.00 NaN NaN NaN -18.0 NaN NaN NaN NaN NaN NaN NaN NaN nan nan nan nan nan nan nan nan nan ... nanoslk nanLM8l689qOp nannan nanQu4f nanRAYp nannan nanDHeTmBftjz nanRO12 nan7gSz nan8ij6Lg8 nanLJF4fPp nanBcur nanZ5OU nanNhsEn4L nan7WwCtIM nan1GbF nan7OmVzos nan76DJixu nanF2FyR07IdsN7I nannan nannan nannan nannan nannan nannan nannan nannan nantaul nan1K8T nan9_Y1 nanVpdQ nannan nanme75fM6ugJ nankIsH nanuKAI nanMtgm nancJvF nanAU8pNoi nanoslk nanLM8l689qOp
34821 NaN NaN NaN NaN NaN NaN NaN NaN 0.117069 10 NaN NaN NaN NaN NaN NaN NaN NaN 390151.60 NaN NaN NaN 4.0 NaN NaN NaN NaN NaN NaN NaN NaN nan nan nan nan nan nan nan nan nan ... nanoslk nannan nannan nanw_Ub nanRAYp nannan nan2jigUH7ejg nanRO12 nandm89 nanLG0vbUP nann1zVHpT8NN nan5FzM nanDmlN nanNhsEn4L nanmAja5EA nanFJ56cYO nansE0uLpj nanG9maF5M nanF2FyR07IdsN7I nannan nannan nannan nannan nannan nannan nannan nannan nantaul nan1K8T nan9_Y1 nan09_Q nannan nanme75fM6ugJ nankIsH nanuKAI nanL84s nancJvF nannan nanoslk nannan
34190 98.0 0.0 152.0 190.0 0.0 64.0 86.96 4107204.0 2.271523 14 0.0 691200.0 147468.60 25.0 10.0 252417.0 24.0 8.0 -150260.80 315.0 66.0 0.0 -26.0 1585040.0 728196.0 0.0 0.0 8437160.0 16.0 1426026.0 NaN 0.0 5.0 0.0 18.0 6.0 0.0 8.0 0.0 0.0 ... 5.0oslk 5.0LM8l689qOp 5.0nan 5.0Qu4f 5.0RAYp 5.0nan 5.0zcRZptzip9 5.0RO12 5.0USOt 5.0pro8v8X 5.0CsjH_hi 5.0rUBc 5.0t_4G 5.0NhsEn4L 5.0mAjDcoz 5.0xYrN 5.0meWVy8V 5.0DQ3u3MC 5.0F2FyR07IdsN7I 0.018.0 0.06.0 0.00.0 0.08.0 0.00.0 0.00.0 0.00.0 0.00.0 0.0taul 0.01K8T 0.09_Y1 0.0VpdQ 0.0IYzP 0.0me75fM6ugJ 0.0sBgB 0.0uKAI 0.0L84s 0.0cJvF 0.0FzaX 0.0oslk 0.0LM8l689qOp
24541 938.0 520.0 148.0 185.0 2.0 96.0 186.64 749586.0 0.714591 160 77.0 2013720.0 100805.70 5.0 10.0 168123.0 48.0 32.0 32282.84 600.0 30.0 68571.0 NaN 3873950.0 2044820.0 4160.0 604800.0 10517440.0 20.0 2592000.0 NaN 7.0 0.0 0.0 18.0 6.0 0.0 0.0 0.0 18.0 ... 0.0zCkv 0.0jySVZNlOJy 0.0ELof 0.0WqMG 0.0ZI9m 0.0mj86 0.0mzKvyx8zhV 0.02Knk1KF 0.0vSNn 0.0fhk21Ss 0.0Hz673939hSRjL 0.0W9XQ 0.0QMes 0.0Ie_5MZs 0.0XTbjhEX 0.0F6F0 0.04UxGlow 0.0catzS2D 0.0TCU50_Yjmm6GIBZ0lL_ 0.018.0 0.06.0 0.00.0 0.00.0 0.00.0 0.018.0 0.00.0 0.00.0 0.0taul 0.01K8T 0.09_Y1 0.0VpdQ 0.0wMei 0.07M47J5GA0pTYIFxg5uy 0.0kIsH 0.0uKAI 0.0L84s 0.0cJvF 0.0FzaX 0.0zCkv 0.0jySVZNlOJy
31483 602.0 88.0 0.0 0.0 NaN 0.0 166.56 0.0 3.250160 30 0.0 0.0 6388.71 0.0 0.0 1194.0 NaN 0.0 81176.80 100.0 0.0 9657.0 4.0 0.0 0.0 385.0 NaN 0.0 0.0 0.0 NaN 7.0 0.0 0.0 9.0 3.0 0.0 0.0 0.0 0.0 ... 0.0oslk 0.0LM8l689qOp 0.0nan 0.0FSa2 0.0RAYp 0.0nan 0.0639qrQK2Mx 0.0RO12 0.0AHgj 0.0creg0bq 0.05q1hF23 0.06Yf9 0.015m3 0.0NhsEn4L 0.0mAjbk_S 0.0oLcf 0.0VgKv48t 0.0nRgz4Af 0.0F2FyR07IdsN7I 0.09.0 0.03.0 0.00.0 0.00.0 0.00.0 0.00.0 0.00.0 0.00.0 0.0taul 0.01K8T 0.09_Y1 0.0sJzTlal 0.0IYzP 0.0me75fM6ugJ 0.0kIsH 0.0uKAI 0.0L84s 0.0cJvF 0.0FzaX 0.0oslk 0.0LM8l689qOp

5 rows × 572 columns

In [50]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=380,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.08,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )



estimator_cb.fit(data_train, labels_train, cat_features = cat_columns_new,verbose=10, plot=True,
                 eval_set = (data_val, labels_val))
0:	learn: 0.5382635	test: 0.5467659	best: 0.5467659 (0)	total: 411ms	remaining: 2m 35s
10:	learn: 0.5693869	test: 0.5804114	best: 0.5804286 (7)	total: 3.45s	remaining: 1m 55s
20:	learn: 0.5804643	test: 0.5828246	best: 0.5833173 (18)	total: 6.7s	remaining: 1m 54s
30:	learn: 0.6618924	test: 0.6572264	best: 0.6572264 (30)	total: 10s	remaining: 1m 52s
40:	learn: 0.7029517	test: 0.6972320	best: 0.6972320 (40)	total: 13.8s	remaining: 1m 53s
50:	learn: 0.7189406	test: 0.7103220	best: 0.7103220 (50)	total: 17.4s	remaining: 1m 52s
60:	learn: 0.7278008	test: 0.7160044	best: 0.7162795 (58)	total: 21.2s	remaining: 1m 50s
70:	learn: 0.7362452	test: 0.7207040	best: 0.7207040 (70)	total: 24.8s	remaining: 1m 47s
80:	learn: 0.7411616	test: 0.7222482	best: 0.7227125 (76)	total: 28.4s	remaining: 1m 44s
90:	learn: 0.7453193	test: 0.7242687	best: 0.7242687 (90)	total: 32s	remaining: 1m 41s
100:	learn: 0.7499150	test: 0.7258942	best: 0.7258942 (100)	total: 35.7s	remaining: 1m 38s
110:	learn: 0.7521745	test: 0.7277544	best: 0.7277544 (110)	total: 39.4s	remaining: 1m 35s
120:	learn: 0.7542795	test: 0.7286828	best: 0.7287072 (119)	total: 43s	remaining: 1m 31s
130:	learn: 0.7565596	test: 0.7289954	best: 0.7291194 (129)	total: 46.5s	remaining: 1m 28s
140:	learn: 0.7581441	test: 0.7298793	best: 0.7298793 (140)	total: 49.9s	remaining: 1m 24s
150:	learn: 0.7594328	test: 0.7304604	best: 0.7304604 (150)	total: 53.2s	remaining: 1m 20s
160:	learn: 0.7624812	test: 0.7317179	best: 0.7322415 (158)	total: 56.6s	remaining: 1m 16s
170:	learn: 0.7639510	test: 0.7316326	best: 0.7322415 (158)	total: 60s	remaining: 1m 13s
180:	learn: 0.7647413	test: 0.7319674	best: 0.7322415 (158)	total: 1m 3s	remaining: 1m 9s
190:	learn: 0.7666149	test: 0.7326874	best: 0.7326874 (190)	total: 1m 6s	remaining: 1m 5s
200:	learn: 0.7692238	test: 0.7331169	best: 0.7331177 (199)	total: 1m 9s	remaining: 1m 2s
210:	learn: 0.7708616	test: 0.7338527	best: 0.7338527 (210)	total: 1m 13s	remaining: 58.9s
220:	learn: 0.7718144	test: 0.7331514	best: 0.7338527 (210)	total: 1m 16s	remaining: 55.2s
230:	learn: 0.7726108	test: 0.7328932	best: 0.7338527 (210)	total: 1m 20s	remaining: 51.6s
240:	learn: 0.7741157	test: 0.7328912	best: 0.7338527 (210)	total: 1m 23s	remaining: 48.1s
250:	learn: 0.7751436	test: 0.7326827	best: 0.7338527 (210)	total: 1m 26s	remaining: 44.6s
260:	learn: 0.7758600	test: 0.7326707	best: 0.7338527 (210)	total: 1m 29s	remaining: 41s
270:	learn: 0.7770941	test: 0.7327516	best: 0.7338527 (210)	total: 1m 33s	remaining: 37.5s
280:	learn: 0.7785065	test: 0.7323921	best: 0.7338527 (210)	total: 1m 36s	remaining: 34.1s
290:	learn: 0.7797082	test: 0.7326773	best: 0.7338527 (210)	total: 1m 40s	remaining: 30.7s
300:	learn: 0.7806645	test: 0.7329949	best: 0.7338527 (210)	total: 1m 43s	remaining: 27.2s
310:	learn: 0.7813766	test: 0.7329112	best: 0.7338527 (210)	total: 1m 47s	remaining: 23.8s
320:	learn: 0.7828681	test: 0.7330454	best: 0.7338527 (210)	total: 1m 50s	remaining: 20.3s
330:	learn: 0.7836865	test: 0.7323653	best: 0.7338527 (210)	total: 1m 54s	remaining: 16.9s
340:	learn: 0.7842430	test: 0.7326436	best: 0.7338527 (210)	total: 1m 57s	remaining: 13.4s
350:	learn: 0.7849295	test: 0.7326449	best: 0.7338527 (210)	total: 2m	remaining: 9.96s
360:	learn: 0.7861502	test: 0.7334417	best: 0.7338527 (210)	total: 2m 3s	remaining: 6.51s
370:	learn: 0.7864491	test: 0.7332253	best: 0.7338527 (210)	total: 2m 7s	remaining: 3.08s
379:	learn: 0.7869116	test: 0.7333084	best: 0.7338527 (210)	total: 2m 10s	remaining: 0us
bestTest = 0.7338526845
bestIteration = 210
Shrink model to first 211 iterations.
Out[50]:
<catboost.core.CatBoostClassifier at 0x7fe7ac39e4e0>
In [58]:
import gc
In [59]:
feature_selection_loop_prediction(estimator_cb,data_val,labels_val,cat_columns_new,threshold=1e-4)
----------
Var6 diff_metric: 0.0
----------
----------
Var13 diff_metric: -0.0004615593547887542
----------
----------
good_num_columns: ['Var13']
----------
Var21 diff_metric: 0.0
----------
----------
Var22 diff_metric: 0.0
----------
----------
Var24 diff_metric: 0.0
----------
----------
Var25 diff_metric: 0.0
----------
----------
Var28 diff_metric: 0.0
----------
----------
Var38 diff_metric: 0.0
----------
----------
Var57 diff_metric: 0.0
----------
----------
Var73 diff_metric: -0.0016938966185686688
----------
----------
good_num_columns: ['Var13', 'Var73']
----------
Var74 diff_metric: -0.0018407930755208746
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74']
----------
Var76 diff_metric: 0.0
----------
----------
Var81 diff_metric: -0.001946856955214682
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81']
----------
Var83 diff_metric: 0.0
----------
----------
Var85 diff_metric: 0.0
----------
----------
Var94 diff_metric: 0.0
----------
----------
Var109 diff_metric: 0.0
----------
----------
Var112 diff_metric: 0.0
----------
----------
Var113 diff_metric: -0.0038471345260009393
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113']
----------
Var119 diff_metric: 0.0
----------
----------
Var123 diff_metric: 0.0
----------
----------
Var125 diff_metric: 0.0
----------
----------
Var126 diff_metric: -0.03469710444600227
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126']
----------
Var133 diff_metric: 1.0787866090433695e-05
----------
----------
Var134 diff_metric: 0.0
----------
----------
Var140 diff_metric: 0.0
----------
----------
Var149 diff_metric: 0.0
----------
----------
Var153 diff_metric: 0.0
----------
----------
Var160 diff_metric: 0.0
----------
----------
Var163 diff_metric: 0.00019085449074152105
----------
----------
Var189 diff_metric: -0.004330269612970894
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126', 'Var189']
----------
Var7 diff_metric: 5.010812567263212e-05
----------
----------
Var35 diff_metric: 0.0
----------
----------
Var44 diff_metric: 0.0
----------
----------
Var65 diff_metric: -1.1896899053120968e-05
----------
----------
Var72 diff_metric: 0.0
----------
----------
Var78 diff_metric: 1.7139600330562388e-05
----------
----------
Var132 diff_metric: -9.588094067369823e-05
----------
----------
good_cat_columns: ['Var132']
----------
Var143 diff_metric: 0.0
----------
----------
Var144 diff_metric: 0.0
----------
----------
Var173 diff_metric: 0.0
----------
----------
Var181 diff_metric: 0.0
----------
----------
Var195 diff_metric: 0.0
----------
----------
Var196 diff_metric: 0.0
----------
----------
Var203 diff_metric: 0.0
----------
----------
Var205 diff_metric: -0.0017716297471275233
----------
----------
good_cat_columns: ['Var132', 'Var205']
----------
Var206 diff_metric: -0.00032655979688978043
----------
----------
good_cat_columns: ['Var132', 'Var205', 'Var206']
----------
Var207 diff_metric: 7.561588381110873e-06
----------
----------
Var208 diff_metric: 0.0
----------
----------
Var210 diff_metric: -0.0012698427421504732
----------
----------
good_cat_columns: ['Var132', 'Var205', 'Var206', 'Var210']
----------
Var211 diff_metric: 0.0
----------
----------
Var218 diff_metric: 0.0
----------
----------
Var219 diff_metric: -0.00032242612857480424
----------
----------
good_cat_columns: ['Var132', 'Var205', 'Var206', 'Var210', 'Var219']
----------
Var221 diff_metric: 0.0
----------
----------
Var223 diff_metric: 7.057482489081224e-06
----------
----------
Var225 diff_metric: 0.0
----------
----------
Var226 diff_metric: 9.779654306352192e-05
----------
----------
Var227 diff_metric: 0.0
----------
----------
Var229 diff_metric: 0.0
----------
----------
Var192 diff_metric: -0.0033655117567071846
----------
----------
good_cat_columns: ['Var132', 'Var205', 'Var206', 'Var210', 'Var219', 'Var192']
----------
Var193 diff_metric: 0.0007001022629211651
----------
----------
Var197 diff_metric: 0.00043705980843367076
----------
----------
Var198 diff_metric: 0.00033875915947800816
----------
----------
Var199 diff_metric: -0.004948605900196523
----------
----------
good_cat_columns: ['Var132', 'Var205', 'Var206', 'Var210', 'Var219', 'Var192', 'Var199']
----------
Var202 diff_metric: 7.188550021053342e-05
----------
----------
Var204 diff_metric: -8.468978986964082e-05
----------
----------
good_cat_columns: ['Var132', 'Var205', 'Var206', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204']
----------
Var212 diff_metric: 0.0002984306881115284
----------
----------
Var216 diff_metric: 7.380110260046813e-05
----------
----------
Var217 diff_metric: -0.0005289079019707987
----------
----------
good_cat_columns: ['Var132', 'Var205', 'Var206', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var217']
----------
Var220 diff_metric: -3.3573452412616334e-05
----------
----------
Var222 diff_metric: 0.00011554107046485296
----------
----------
Var228 diff_metric: -5.2729476311630386e-05
----------
----------
Var6,Var13_mix diff_metric: 0.0
----------
----------
Var6,Var21_mix diff_metric: 0.0
----------
----------
Var6,Var22_mix diff_metric: 0.0
----------
----------
Var6,Var24_mix diff_metric: 0.0
----------
----------
Var6,Var25_mix diff_metric: 0.0
----------
----------
Var6,Var28_mix diff_metric: 0.0
----------
----------
Var6,Var38_mix diff_metric: 0.0
----------
----------
Var6,Var57_mix diff_metric: 0.0
----------
----------
Var6,Var73_mix diff_metric: 0.00019317337784507949
----------
----------
Var6,Var74_mix diff_metric: 0.0
----------
----------
Var6,Var76_mix diff_metric: 0.0
----------
----------
Var6,Var81_mix diff_metric: 1.64338520818319e-05
----------
----------
Var6,Var83_mix diff_metric: 1.9055202720608122e-05
----------
----------
Var6,Var85_mix diff_metric: 0.0
----------
----------
Var6,Var94_mix diff_metric: 0.0
----------
----------
Var6,Var109_mix diff_metric: 0.0
----------
----------
Var6,Var112_mix diff_metric: 0.0
----------
----------
Var6,Var113_mix diff_metric: 4.950319860219654e-05
----------
----------
Var6,Var119_mix diff_metric: 0.0
----------
----------
Var6,Var123_mix diff_metric: 0.0
----------
----------
Var6,Var125_mix diff_metric: 0.0
----------
----------
Var6,Var126_mix diff_metric: -0.00010314006551959132
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126', 'Var189', 'Var6,Var126_mix']
----------
Var6,Var133_mix diff_metric: 0.0
----------
----------
Var6,Var134_mix diff_metric: 0.0
----------
----------
Var6,Var140_mix diff_metric: 0.0
----------
----------
Var6,Var149_mix diff_metric: 0.0
----------
----------
Var6,Var153_mix diff_metric: 0.0
----------
----------
Var6,Var160_mix diff_metric: 0.0
----------
----------
Var6,Var163_mix diff_metric: 0.0
----------
----------
Var6,Var189_mix diff_metric: 0.0
----------
----------
Var13,Var21_mix diff_metric: 0.0
----------
----------
Var13,Var22_mix diff_metric: 0.0
----------
----------
Var13,Var24_mix diff_metric: 0.0
----------
----------
Var13,Var25_mix diff_metric: 0.0
----------
----------
Var13,Var28_mix diff_metric: -0.00012592565184177396
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126', 'Var189', 'Var6,Var126_mix', 'Var13,Var28_mix']
----------
Var13,Var38_mix diff_metric: -8.075776391136547e-05
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126', 'Var189', 'Var6,Var126_mix', 'Var13,Var28_mix', 'Var13,Var38_mix']
----------
Var13,Var57_mix diff_metric: 0.0
----------
----------
Var13,Var73_mix diff_metric: 0.0
----------
----------
Var13,Var74_mix diff_metric: 0.0
----------
----------
Var13,Var76_mix diff_metric: 0.0
----------
----------
Var13,Var81_mix diff_metric: 0.0
----------
----------
Var13,Var83_mix diff_metric: 0.0
----------
----------
Var13,Var85_mix diff_metric: 0.0
----------
----------
Var13,Var94_mix diff_metric: 0.0
----------
----------
Var13,Var109_mix diff_metric: -0.00015536543593919205
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126', 'Var189', 'Var6,Var126_mix', 'Var13,Var28_mix', 'Var13,Var38_mix', 'Var13,Var109_mix']
----------
Var13,Var112_mix diff_metric: 0.0
----------
----------
Var13,Var113_mix diff_metric: 0.0
----------
----------
Var13,Var119_mix diff_metric: 0.0
----------
----------
Var13,Var123_mix diff_metric: 4.032847136237194e-07
----------
----------
Var13,Var125_mix diff_metric: 0.0
----------
----------
Var13,Var126_mix diff_metric: -0.012151270886241461
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126', 'Var189', 'Var6,Var126_mix', 'Var13,Var28_mix', 'Var13,Var38_mix', 'Var13,Var109_mix', 'Var13,Var126_mix']
----------
Var13,Var133_mix diff_metric: 0.0
----------
----------
Var13,Var134_mix diff_metric: 0.0
----------
----------
Var13,Var140_mix diff_metric: 0.0
----------
----------
Var13,Var149_mix diff_metric: 0.0
----------
----------
Var13,Var153_mix diff_metric: 0.0
----------
----------
Var13,Var160_mix diff_metric: 0.0
----------
----------
Var13,Var163_mix diff_metric: 0.0
----------
----------
Var13,Var189_mix diff_metric: -0.00032595486981923383
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126', 'Var189', 'Var6,Var126_mix', 'Var13,Var28_mix', 'Var13,Var38_mix', 'Var13,Var109_mix', 'Var13,Var126_mix', 'Var13,Var189_mix']
----------
Var21,Var22_mix diff_metric: 0.0
----------
----------
Var21,Var24_mix diff_metric: 0.0
----------
----------
Var21,Var25_mix diff_metric: 0.0
----------
----------
Var21,Var28_mix diff_metric: 0.0
----------
----------
Var21,Var38_mix diff_metric: 0.0
----------
----------
Var21,Var57_mix diff_metric: 0.0
----------
----------
Var21,Var73_mix diff_metric: -2.1474911002794528e-05
----------
----------
Var21,Var74_mix diff_metric: 0.0
----------
----------
Var21,Var76_mix diff_metric: 0.0
----------
----------
Var21,Var81_mix diff_metric: 0.0
----------
----------
Var21,Var83_mix diff_metric: 0.0
----------
----------
Var21,Var85_mix diff_metric: 0.0
----------
----------
Var21,Var94_mix diff_metric: 0.0
----------
----------
Var21,Var109_mix diff_metric: 0.0
----------
----------
Var21,Var112_mix diff_metric: 0.0
----------
----------
Var21,Var113_mix diff_metric: 0.0
----------
----------
Var21,Var119_mix diff_metric: 0.0
----------
----------
Var21,Var123_mix diff_metric: 0.00012784125423159765
----------
----------
Var21,Var125_mix diff_metric: 0.0
----------
----------
Var21,Var126_mix diff_metric: -0.0014468847314492717
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126', 'Var189', 'Var6,Var126_mix', 'Var13,Var28_mix', 'Var13,Var38_mix', 'Var13,Var109_mix', 'Var13,Var126_mix', 'Var13,Var189_mix', 'Var21,Var126_mix']
----------
Var21,Var133_mix diff_metric: -0.00022039509601767815
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126', 'Var189', 'Var6,Var126_mix', 'Var13,Var28_mix', 'Var13,Var38_mix', 'Var13,Var109_mix', 'Var13,Var126_mix', 'Var13,Var189_mix', 'Var21,Var126_mix', 'Var21,Var133_mix']
----------
Var21,Var134_mix diff_metric: 0.0
----------
----------
Var21,Var140_mix diff_metric: 1.4014143799756518e-05
----------
----------
Var21,Var149_mix diff_metric: 0.0
----------
----------
Var21,Var153_mix diff_metric: 0.0
----------
----------
Var21,Var160_mix diff_metric: 0.0003712235789279372
----------
----------
Var21,Var163_mix diff_metric: 0.0
----------
----------
Var21,Var189_mix diff_metric: 0.0
----------
----------
Var22,Var24_mix diff_metric: 0.0
----------
----------
Var22,Var25_mix diff_metric: 0.0
----------
----------
Var22,Var28_mix diff_metric: 0.0
----------
----------
Var22,Var38_mix diff_metric: 0.0
----------
----------
Var22,Var57_mix diff_metric: 0.0
----------
----------
Var22,Var73_mix diff_metric: 0.00026374820273644684
----------
----------
Var22,Var74_mix diff_metric: 0.0
----------
----------
Var22,Var76_mix diff_metric: 0.0
----------
----------
Var22,Var81_mix diff_metric: 0.0
----------
----------
Var22,Var83_mix diff_metric: 0.0
----------
----------
Var22,Var85_mix diff_metric: 0.0
----------
----------
Var22,Var94_mix diff_metric: 0.0
----------
----------
Var22,Var109_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var25_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var28_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var38_mix diff_metric: 0.00012098541409921726
----------
----------
Var6,Var13,Var57_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var73_mix diff_metric: 0.0004999722237652993
----------
----------
Var6,Var13,Var74_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var76_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var81_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var83_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var85_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var94_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var109_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var112_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var113_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var119_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var123_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var125_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var126_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var133_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var134_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var140_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var149_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var153_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var160_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var163_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var189_mix diff_metric: 0.0005369735962439393
----------
----------
Var6,Var21,Var22_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var24_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var25_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var28_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var38_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var57_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var73_mix diff_metric: 0.00010314006551959132
----------
----------
Var6,Var21,Var74_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var76_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var81_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var83_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var85_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var94_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var109_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var112_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var113_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var119_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var123_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var125_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var126_mix diff_metric: -6.150091883427855e-06
----------
----------
Var6,Var21,Var133_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var134_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var140_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var149_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var153_mix diff_metric: -0.00014356935806469906
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126', 'Var189', 'Var6,Var126_mix', 'Var13,Var28_mix', 'Var13,Var38_mix', 'Var13,Var109_mix', 'Var13,Var126_mix', 'Var13,Var189_mix', 'Var21,Var126_mix', 'Var21,Var133_mix', 'Var6,Var21,Var153_mix']
----------
Var6,Var21,Var160_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var163_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var189_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var24_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var25_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var28_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var38_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var57_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var73_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var74_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var76_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var81_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var83_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var85_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var94_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var109_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var112_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var113_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var119_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var123_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var125_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var126_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var133_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var134_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var140_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var149_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var153_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var160_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var163_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var189_mix diff_metric: 0.0
----------
----------
Var6,Var24,Var25_mix diff_metric: 0.0
----------
----------
Var6,Var24,Var28_mix diff_metric: -0.000201339893296959
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126', 'Var189', 'Var6,Var126_mix', 'Var13,Var28_mix', 'Var13,Var38_mix', 'Var13,Var109_mix', 'Var13,Var126_mix', 'Var13,Var189_mix', 'Var21,Var126_mix', 'Var21,Var133_mix', 'Var6,Var21,Var153_mix', 'Var6,Var24,Var28_mix']
----------
Var6,Var24,Var38_mix diff_metric: 0.0
----------
----------
Var6,Var24,Var57_mix diff_metric: 0.0
----------
----------
Var6,Var24,Var73_mix diff_metric: 0.0
----------
----------
Var6,Var24,Var74_mix diff_metric: 0.0
----------
----------
Var6,Var24,Var76_mix diff_metric: -0.0001292527507295027
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126', 'Var189', 'Var6,Var126_mix', 'Var13,Var28_mix', 'Var13,Var38_mix', 'Var13,Var109_mix', 'Var13,Var126_mix', 'Var13,Var189_mix', 'Var21,Var126_mix', 'Var21,Var133_mix', 'Var6,Var21,Var153_mix', 'Var6,Var24,Var28_mix', 'Var6,Var24,Var76_mix']
----------
Var6,Var24,Var81_mix diff_metric: 0.0
----------
----------
Var6,Var24,Var83_mix diff_metric: 0.0
----------
----------
Var6,Var24,Var85_mix diff_metric: 0.0
----------
----------
Var6,Var24,Var94_mix diff_metric: -5.242701277663464e-06
----------
----------
Var6,Var24,Var109_mix diff_metric: 0.0
----------
----------
Var6,Var24,Var112_mix diff_metric: 0.0
----------
----------
Var6,Var24,Var113_mix diff_metric: 0.0
----------
----------
Var6,Var24,Var119_mix diff_metric: 0.0
----------
----------
Var6,Var24,Var123_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22_mix diff_metric: 0.00013903240503565506
----------
----------
Var6,Var13,Var21,Var24_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var28_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var38_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var57_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var73_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var74_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var76_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var81_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var83_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var85_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var94_mix diff_metric: 0.0001220944470616825
----------
----------
Var6,Var13,Var21,Var109_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var112_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var113_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var119_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var123_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var125_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var126_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var133_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var134_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var140_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var149_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var153_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var160_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var163_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var189_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var24_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var25_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var28_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var38_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var57_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var73_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var74_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var76_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var81_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var83_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var85_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var94_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var109_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var112_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var113_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var119_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var123_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var125_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var126_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var133_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var134_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var140_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var149_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var153_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var160_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var163_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var189_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var25_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var28_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var38_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var57_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var73_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var74_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var76_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var81_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var83_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var85_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var94_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var109_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var112_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var113_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var119_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var123_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var125_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var126_mix diff_metric: -0.001662541232081205
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126', 'Var189', 'Var6,Var126_mix', 'Var13,Var28_mix', 'Var13,Var38_mix', 'Var13,Var109_mix', 'Var13,Var126_mix', 'Var13,Var189_mix', 'Var21,Var126_mix', 'Var21,Var133_mix', 'Var6,Var21,Var153_mix', 'Var6,Var24,Var28_mix', 'Var6,Var24,Var76_mix', 'Var6,Var13,Var24,Var126_mix']
----------
Var6,Var13,Var24,Var133_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var134_mix diff_metric: 6.704608364660469e-05
----------
----------
Var6,Var13,Var24,Var140_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var149_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var153_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var160_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var163_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var189_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var25,Var28_mix diff_metric: -0.0004899405165129656
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126', 'Var189', 'Var6,Var126_mix', 'Var13,Var28_mix', 'Var13,Var38_mix', 'Var13,Var109_mix', 'Var13,Var126_mix', 'Var13,Var189_mix', 'Var21,Var126_mix', 'Var21,Var133_mix', 'Var6,Var21,Var153_mix', 'Var6,Var24,Var28_mix', 'Var6,Var24,Var76_mix', 'Var6,Var13,Var24,Var126_mix', 'Var6,Var13,Var25,Var28_mix']
----------
Var6,Var13,Var25,Var38_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var25,Var57_mix diff_metric: -0.00018440193532298643
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126', 'Var189', 'Var6,Var126_mix', 'Var13,Var28_mix', 'Var13,Var38_mix', 'Var13,Var109_mix', 'Var13,Var126_mix', 'Var13,Var189_mix', 'Var21,Var126_mix', 'Var21,Var133_mix', 'Var6,Var21,Var153_mix', 'Var6,Var24,Var28_mix', 'Var6,Var24,Var76_mix', 'Var6,Var13,Var24,Var126_mix', 'Var6,Var13,Var25,Var28_mix', 'Var6,Var13,Var25,Var57_mix']
----------
Var6,Var13,Var25,Var73_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var25,Var74_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var25,Var76_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var25,Var81_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var25,Var83_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var25,Var85_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var25,Var94_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var25,Var109_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var25,Var112_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var25,Var113_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var25,Var119_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var25,Var123_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var25,Var125_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var25,Var126_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var25,Var133_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var25,Var134_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var24_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var25_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var28_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var38_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var57_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var73_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var74_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var76_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var81_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var83_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var85_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var94_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var109_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var112_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var113_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var119_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var123_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var125_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var126_mix diff_metric: 5.1317979813836345e-05
----------
----------
Var6,Var13,Var21,Var22,Var133_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var134_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var140_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var149_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var153_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var160_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var163_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var189_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var25_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var28_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var38_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var57_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var73_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var74_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var76_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var81_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var83_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var85_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var94_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var109_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var112_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var113_mix diff_metric: 5.182208570575497e-05
----------
----------
Var6,Var13,Var21,Var24,Var119_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var123_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var125_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var126_mix diff_metric: -0.000333113173486721
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126', 'Var189', 'Var6,Var126_mix', 'Var13,Var28_mix', 'Var13,Var38_mix', 'Var13,Var109_mix', 'Var13,Var126_mix', 'Var13,Var189_mix', 'Var21,Var126_mix', 'Var21,Var133_mix', 'Var6,Var21,Var153_mix', 'Var6,Var24,Var28_mix', 'Var6,Var24,Var76_mix', 'Var6,Var13,Var24,Var126_mix', 'Var6,Var13,Var25,Var28_mix', 'Var6,Var13,Var25,Var57_mix', 'Var6,Var13,Var21,Var24,Var126_mix']
----------
Var6,Var13,Var21,Var24,Var133_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var134_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var140_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var149_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var153_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var160_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var163_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var189_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var28_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var38_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var57_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var73_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var74_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var76_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var81_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var83_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var85_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var94_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var109_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var112_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var113_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var119_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var123_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var125_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var126_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var133_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var134_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var140_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var149_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var153_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var160_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var163_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var189_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var28,Var38_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var28,Var57_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var28,Var73_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var28,Var74_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var28,Var76_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var28,Var81_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var28,Var83_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var28,Var85_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var28,Var94_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var28,Var109_mix diff_metric: 7.97495521269731e-05
----------
----------
Var6,Var13,Var21,Var28,Var112_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var28,Var113_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var28,Var119_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var28,Var123_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var28,Var125_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var28,Var126_mix diff_metric: -0.00029278470212035224
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126', 'Var189', 'Var6,Var126_mix', 'Var13,Var28_mix', 'Var13,Var38_mix', 'Var13,Var109_mix', 'Var13,Var126_mix', 'Var13,Var189_mix', 'Var21,Var126_mix', 'Var21,Var133_mix', 'Var6,Var21,Var153_mix', 'Var6,Var24,Var28_mix', 'Var6,Var24,Var76_mix', 'Var6,Var13,Var24,Var126_mix', 'Var6,Var13,Var25,Var28_mix', 'Var6,Var13,Var25,Var57_mix', 'Var6,Var13,Var21,Var24,Var126_mix', 'Var6,Var13,Var21,Var28,Var126_mix']
----------
Var6,Var13,Var21,Var28,Var133_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var28,Var134_mix diff_metric: 9.184809353690593e-05
----------
----------
Var6,Var13,Var21,Var28,Var140_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var28,Var149_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var28,Var153_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var28,Var160_mix diff_metric: 0.0
----------
----------
Var7,Var35_mix diff_metric: 0.0
----------
----------
Var7,Var44_mix diff_metric: -6.502966007837507e-05
----------
----------
Var7,Var65_mix diff_metric: -9.900639720472615e-05
----------
----------
good_cat_columns: ['Var132', 'Var205', 'Var206', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var217', 'Var7,Var65_mix']
----------
Var7,Var72_mix diff_metric: 0.00033502877587654467
----------
----------
Var7,Var78_mix diff_metric: 1.784534857951492e-05
----------
----------
Var7,Var132_mix diff_metric: -3.0246353521778957e-07
----------
----------
Var7,Var143_mix diff_metric: 0.00018480522003661015
----------
----------
Var7,Var144_mix diff_metric: -2.3390513392507195e-05
----------
----------
Var7,Var173_mix diff_metric: -8.267336630174427e-06
----------
----------
Var7,Var181_mix diff_metric: 0.0
----------
----------
Var7,Var195_mix diff_metric: 1.1090329624652284e-06
----------
----------
Var7,Var196_mix diff_metric: 4.758759621226183e-05
----------
----------
Var7,Var203_mix diff_metric: 4.032847136237194e-07
----------
----------
Var7,Var205_mix diff_metric: 4.436131850193981e-06
----------
----------
Var7,Var206_mix diff_metric: 0.0004975525154833349
----------
----------
Var7,Var207_mix diff_metric: 0.0
----------
----------
Var7,Var208_mix diff_metric: -1.078786609065574e-05
----------
----------
Var7,Var210_mix diff_metric: 0.00038039830616387604
----------
----------
Var7,Var211_mix diff_metric: -3.548905480266207e-05
----------
----------
Var7,Var218_mix diff_metric: -0.0023128378328649557
----------
----------
good_cat_columns: ['Var132', 'Var205', 'Var206', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var217', 'Var7,Var65_mix', 'Var7,Var218_mix']
----------
Var7,Var219_mix diff_metric: -0.00022140330780184847
----------
----------
good_cat_columns: ['Var132', 'Var205', 'Var206', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var217', 'Var7,Var65_mix', 'Var7,Var218_mix', 'Var7,Var219_mix']
----------
Var7,Var221_mix diff_metric: -0.00033270988877309726
----------
----------
good_cat_columns: ['Var132', 'Var205', 'Var206', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var217', 'Var7,Var65_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix']
----------
Var7,Var223_mix diff_metric: 0.00010162774784328032
----------
----------
Var7,Var225_mix diff_metric: 5.272947631151936e-05
----------
----------
Var7,Var226_mix diff_metric: 0.0006447514359707585
----------
----------
Var7,Var227_mix diff_metric: 2.5003652247113095e-05
----------
----------
Var7,Var229_mix diff_metric: 0.0001160451763568826
----------
----------
Var7,Var192_mix diff_metric: -0.0001233043012028867
----------
----------
good_cat_columns: ['Var132', 'Var205', 'Var206', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var217', 'Var7,Var65_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var192_mix']
----------
Var7,Var193_mix diff_metric: -0.0002831058689924948
----------
----------
good_cat_columns: ['Var132', 'Var205', 'Var206', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var217', 'Var7,Var65_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var192_mix', 'Var7,Var193_mix']
----------
Var7,Var197_mix diff_metric: 0.00023723223281302275
----------
----------
Var7,Var198_mix diff_metric: 7.501095674156133e-05
----------
----------
Var7,Var199_mix diff_metric: -0.0003100251236294316
----------
----------
good_cat_columns: ['Var132', 'Var205', 'Var206', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var217', 'Var7,Var65_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var199_mix']
----------
Var7,Var202_mix diff_metric: -0.0005752856440421894
----------
----------
good_cat_columns: ['Var132', 'Var205', 'Var206', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var217', 'Var7,Var65_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var199_mix', 'Var7,Var202_mix']
----------
Var7,Var204_mix diff_metric: 0.0
----------
----------
Var7,Var212_mix diff_metric: -0.0009467108653271605
----------
----------
good_cat_columns: ['Var132', 'Var205', 'Var206', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var217', 'Var7,Var65_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var199_mix', 'Var7,Var202_mix', 'Var7,Var212_mix']
----------
Var7,Var216_mix diff_metric: -0.0005828472324233003
----------
----------
good_cat_columns: ['Var132', 'Var205', 'Var206', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var217', 'Var7,Var65_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var199_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix']
----------
Var7,Var217_mix diff_metric: -0.00919055616087483
----------
----------
good_cat_columns: ['Var132', 'Var205', 'Var206', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var217', 'Var7,Var65_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var199_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix']
----------
Var7,Var220_mix diff_metric: -0.0008055612155445369
----------
----------
good_cat_columns: ['Var132', 'Var205', 'Var206', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var217', 'Var7,Var65_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var199_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix']
----------
Var7,Var222_mix diff_metric: 0.00016827054677648334
----------
----------
Var7,Var228_mix diff_metric: 7.793477091555534e-05
----------
----------
Var35,Var44_mix diff_metric: 0.0
----------
----------
Var35,Var65_mix diff_metric: 5.928285290868196e-05
----------
----------
Var35,Var72_mix diff_metric: -3.3371810055804474e-05
----------
----------
Var35,Var78_mix diff_metric: 0.00023945029873806423
----------
----------
Var35,Var132_mix diff_metric: 0.00010132528430817356
----------
----------
Var35,Var143_mix diff_metric: 2.6011864031283416e-05
----------
----------
Var35,Var144_mix diff_metric: 5.121715863520837e-05
----------
----------
Var35,Var173_mix diff_metric: 0.0
----------
----------
Var35,Var181_mix diff_metric: -0.0002477176353684607
----------
----------
good_cat_columns: ['Var132', 'Var205', 'Var206', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var217', 'Var7,Var65_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var199_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var181_mix']
----------
Var35,Var195_mix diff_metric: 1.3106753192770881e-06
----------
----------
Var35,Var196_mix diff_metric: 1.2300183766633666e-05
----------
----------
Var35,Var203_mix diff_metric: 7.0574824889702015e-06
----------
----------
Var35,Var205_mix diff_metric: -0.00017018614916630703
----------
----------
good_cat_columns: ['Var132', 'Var205', 'Var206', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var217', 'Var7,Var65_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var199_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var181_mix', 'Var35,Var205_mix']
----------
Var35,Var206_mix diff_metric: 6.472719654304626e-05
----------
----------
Var35,Var207_mix diff_metric: -6.482801772156321e-05
----------
----------
Var35,Var208_mix diff_metric: -9.063823939614579e-05
----------
----------
good_cat_columns: ['Var132', 'Var205', 'Var206', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var217', 'Var7,Var65_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var199_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var181_mix', 'Var35,Var205_mix', 'Var35,Var208_mix']
----------
Var35,Var210_mix diff_metric: -0.000642432548867311
----------
----------
good_cat_columns: ['Var132', 'Var205', 'Var206', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var217', 'Var7,Var65_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var199_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var181_mix', 'Var35,Var205_mix', 'Var35,Var208_mix', 'Var35,Var210_mix']
----------
Var35,Var211_mix diff_metric: 5.988777997900652e-05
----------
----------
Var35,Var218_mix diff_metric: -0.0004712381879168337
----------
----------
good_cat_columns: ['Var132', 'Var205', 'Var206', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var217', 'Var7,Var65_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var199_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var181_mix', 'Var35,Var205_mix', 'Var35,Var208_mix', 'Var35,Var210_mix', 'Var35,Var218_mix']
----------
Var35,Var219_mix diff_metric: -0.00010203103255712609
----------
----------
good_cat_columns: ['Var132', 'Var205', 'Var206', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var217', 'Var7,Var65_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var199_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var181_mix', 'Var35,Var205_mix', 'Var35,Var208_mix', 'Var35,Var210_mix', 'Var35,Var218_mix', 'Var35,Var219_mix']
----------
Var35,Var221_mix diff_metric: 9.416698064057538e-05
----------
----------
Var35,Var223_mix diff_metric: 1.5929746189802252e-05
----------
----------
Var35,Var225_mix diff_metric: -0.00017734445283401623
----------
----------
good_cat_columns: ['Var132', 'Var205', 'Var206', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var217', 'Var7,Var65_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var199_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var181_mix', 'Var35,Var205_mix', 'Var35,Var208_mix', 'Var35,Var210_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var225_mix']
----------
Var35,Var226_mix diff_metric: 0.0
----------
----------
Var35,Var227_mix diff_metric: -4.5772815001066114e-05
----------
----------
Var35,Var229_mix diff_metric: -0.0006927423168969771
----------
----------
good_cat_columns: ['Var132', 'Var205', 'Var206', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var217', 'Var7,Var65_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var199_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var181_mix', 'Var35,Var205_mix', 'Var35,Var208_mix', 'Var35,Var210_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var225_mix', 'Var35,Var229_mix']
----------
Var35,Var192_mix diff_metric: -0.0016978286445268331
----------
----------
good_cat_columns: ['Var132', 'Var205', 'Var206', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var217', 'Var7,Var65_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var199_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var181_mix', 'Var35,Var205_mix', 'Var35,Var208_mix', 'Var35,Var210_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var225_mix', 'Var35,Var229_mix', 'Var35,Var192_mix']
----------
Var35,Var193_mix diff_metric: -0.0003853385439063217
----------
----------
good_cat_columns: ['Var132', 'Var205', 'Var206', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var217', 'Var7,Var65_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var199_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var181_mix', 'Var35,Var205_mix', 'Var35,Var208_mix', 'Var35,Var210_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var225_mix', 'Var35,Var229_mix', 'Var35,Var192_mix', 'Var35,Var193_mix']
----------
Var35,Var197_mix diff_metric: 6.049270704910903e-06
----------
----------
Var35,Var198_mix diff_metric: 0.0004198193869245914
----------
----------
Var35,Var199_mix diff_metric: -0.000942980481725586
----------
----------
good_cat_columns: ['Var132', 'Var205', 'Var206', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var217', 'Var7,Var65_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var199_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var181_mix', 'Var35,Var205_mix', 'Var35,Var208_mix', 'Var35,Var210_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var225_mix', 'Var35,Var229_mix', 'Var35,Var192_mix', 'Var35,Var193_mix', 'Var35,Var199_mix']
----------
Var35,Var202_mix diff_metric: -0.00012118705645614014
----------
----------
good_cat_columns: ['Var132', 'Var205', 'Var206', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var217', 'Var7,Var65_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var199_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var181_mix', 'Var35,Var205_mix', 'Var35,Var208_mix', 'Var35,Var210_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var225_mix', 'Var35,Var229_mix', 'Var35,Var192_mix', 'Var35,Var193_mix', 'Var35,Var199_mix', 'Var35,Var202_mix']
----------
Var35,Var204_mix diff_metric: -7.309535435173764e-05
----------
----------
Var35,Var212_mix diff_metric: -6.049270705466014e-07
----------
----------
Var35,Var216_mix diff_metric: 0.00023975276227339304
----------
----------
Var35,Var217_mix diff_metric: 0.0007501095674156133
----------
----------
Var35,Var220_mix diff_metric: 0.00037686956491933543
----------
----------
Var35,Var222_mix diff_metric: -9.336041121332794e-05
----------
----------
good_cat_columns: ['Var132', 'Var205', 'Var206', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var217', 'Var7,Var65_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var199_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var181_mix', 'Var35,Var205_mix', 'Var35,Var208_mix', 'Var35,Var210_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var225_mix', 'Var35,Var229_mix', 'Var35,Var192_mix', 'Var35,Var193_mix', 'Var35,Var199_mix', 'Var35,Var202_mix', 'Var35,Var222_mix']
----------
Var35,Var228_mix diff_metric: -0.0008163490816350816
----------
----------
good_cat_columns: ['Var132', 'Var205', 'Var206', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var217', 'Var7,Var65_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var199_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var181_mix', 'Var35,Var205_mix', 'Var35,Var208_mix', 'Var35,Var210_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var225_mix', 'Var35,Var229_mix', 'Var35,Var192_mix', 'Var35,Var193_mix', 'Var35,Var199_mix', 'Var35,Var202_mix', 'Var35,Var222_mix', 'Var35,Var228_mix']
----------
Var44,Var65_mix diff_metric: -5.988777997911754e-05
----------
----------
Var44,Var72_mix diff_metric: 0.0005824439477096766
----------
----------
Var44,Var78_mix diff_metric: -0.00023340102803326435
----------
----------
good_cat_columns: ['Var132', 'Var205', 'Var206', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var217', 'Var7,Var65_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var199_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var181_mix', 'Var35,Var205_mix', 'Var35,Var208_mix', 'Var35,Var210_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var225_mix', 'Var35,Var229_mix', 'Var35,Var192_mix', 'Var35,Var193_mix', 'Var35,Var199_mix', 'Var35,Var202_mix', 'Var35,Var222_mix', 'Var35,Var228_mix', 'Var44,Var78_mix']
----------
Var44,Var132_mix diff_metric: 0.00033724684180169717
----------
----------
Var44,Var143_mix diff_metric: 0.0
----------
----------
Var44,Var144_mix diff_metric: 0.00016030567368163773
----------
----------
Var44,Var173_mix diff_metric: -2.822992995699103e-06
----------
----------
Var44,Var181_mix diff_metric: 0.0
----------
----------
Var44,Var195_mix diff_metric: 8.277418747937304e-05
----------
----------
Var44,Var196_mix diff_metric: 3.488412773189342e-05
----------
----------
Var44,Var203_mix diff_metric: -0.00022735175732835344
----------
----------
good_cat_columns: ['Var132', 'Var205', 'Var206', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var217', 'Var7,Var65_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var199_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var181_mix', 'Var35,Var205_mix', 'Var35,Var208_mix', 'Var35,Var210_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var225_mix', 'Var35,Var229_mix', 'Var35,Var192_mix', 'Var35,Var193_mix', 'Var35,Var199_mix', 'Var35,Var202_mix', 'Var35,Var222_mix', 'Var35,Var228_mix', 'Var44,Var78_mix', 'Var44,Var203_mix']
----------
Var44,Var205_mix diff_metric: 0.0006045237857827956
----------
----------
Var44,Var206_mix diff_metric: -0.0004833367293266555
----------
----------
good_cat_columns: ['Var132', 'Var205', 'Var206', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var217', 'Var7,Var65_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var199_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var181_mix', 'Var35,Var205_mix', 'Var35,Var208_mix', 'Var35,Var210_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var225_mix', 'Var35,Var229_mix', 'Var35,Var192_mix', 'Var35,Var193_mix', 'Var35,Var199_mix', 'Var35,Var202_mix', 'Var35,Var222_mix', 'Var35,Var228_mix', 'Var44,Var78_mix', 'Var44,Var203_mix', 'Var44,Var206_mix']
----------
Var44,Var207_mix diff_metric: 0.0001353020214344136
----------
----------
Var44,Var208_mix diff_metric: 0.0
----------
----------
Var44,Var210_mix diff_metric: -0.00045248544873144336
----------
----------
good_cat_columns: ['Var132', 'Var205', 'Var206', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var217', 'Var7,Var65_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var199_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var181_mix', 'Var35,Var205_mix', 'Var35,Var208_mix', 'Var35,Var210_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var225_mix', 'Var35,Var229_mix', 'Var35,Var192_mix', 'Var35,Var193_mix', 'Var35,Var199_mix', 'Var35,Var202_mix', 'Var35,Var222_mix', 'Var35,Var228_mix', 'Var44,Var78_mix', 'Var44,Var203_mix', 'Var44,Var206_mix', 'Var44,Var210_mix']
----------
Var44,Var211_mix diff_metric: 0.0
----------
----------
Var44,Var218_mix diff_metric: -0.0005220520618385294
----------
----------
good_cat_columns: ['Var132', 'Var205', 'Var206', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var217', 'Var7,Var65_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var199_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var181_mix', 'Var35,Var205_mix', 'Var35,Var208_mix', 'Var35,Var210_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var225_mix', 'Var35,Var229_mix', 'Var35,Var192_mix', 'Var35,Var193_mix', 'Var35,Var199_mix', 'Var35,Var202_mix', 'Var35,Var222_mix', 'Var35,Var228_mix', 'Var44,Var78_mix', 'Var44,Var203_mix', 'Var44,Var206_mix', 'Var44,Var210_mix', 'Var44,Var218_mix']
----------
Var44,Var219_mix diff_metric: -0.00010142610548669051
----------
----------
good_cat_columns: ['Var132', 'Var205', 'Var206', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var217', 'Var7,Var65_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var199_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var181_mix', 'Var35,Var205_mix', 'Var35,Var208_mix', 'Var35,Var210_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var225_mix', 'Var35,Var229_mix', 'Var35,Var192_mix', 'Var35,Var193_mix', 'Var35,Var199_mix', 'Var35,Var202_mix', 'Var35,Var222_mix', 'Var35,Var228_mix', 'Var44,Var78_mix', 'Var44,Var203_mix', 'Var44,Var206_mix', 'Var44,Var210_mix', 'Var44,Var218_mix', 'Var44,Var219_mix']
----------
Var44,Var221_mix diff_metric: -2.570940049617665e-05
----------
----------
Var44,Var223_mix diff_metric: -0.00014074636506888893
----------
----------
good_cat_columns: ['Var132', 'Var205', 'Var206', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var217', 'Var7,Var65_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var199_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var181_mix', 'Var35,Var205_mix', 'Var35,Var208_mix', 'Var35,Var210_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var225_mix', 'Var35,Var229_mix', 'Var35,Var192_mix', 'Var35,Var193_mix', 'Var35,Var199_mix', 'Var35,Var202_mix', 'Var35,Var222_mix', 'Var35,Var228_mix', 'Var44,Var78_mix', 'Var44,Var203_mix', 'Var44,Var206_mix', 'Var44,Var210_mix', 'Var44,Var218_mix', 'Var44,Var219_mix', 'Var44,Var223_mix']

Out[59]:
(['Var13',
  'Var73',
  'Var74',
  'Var81',
  'Var113',
  'Var126',
  'Var189',
  'Var6,Var126_mix',
  'Var13,Var28_mix',
  'Var13,Var38_mix',
  'Var13,Var109_mix',
  'Var13,Var126_mix',
  'Var13,Var189_mix',
  'Var21,Var126_mix',
  'Var21,Var133_mix',
  'Var6,Var21,Var153_mix',
  'Var6,Var24,Var28_mix',
  'Var6,Var24,Var76_mix',
  'Var6,Var13,Var24,Var126_mix',
  'Var6,Var13,Var25,Var28_mix',
  'Var6,Var13,Var25,Var57_mix',
  'Var6,Var13,Var21,Var24,Var126_mix',
  'Var6,Var13,Var21,Var28,Var126_mix'],
 ['Var132',
  'Var205',
  'Var206',
  'Var210',
  'Var219',
  'Var192',
  'Var199',
  'Var204',
  'Var217',
  'Var7,Var65_mix',
  'Var7,Var218_mix',
  'Var7,Var219_mix',
  'Var7,Var221_mix',
  'Var7,Var192_mix',
  'Var7,Var193_mix',
  'Var7,Var199_mix',
  'Var7,Var202_mix',
  'Var7,Var212_mix',
  'Var7,Var216_mix',
  'Var7,Var217_mix',
  'Var7,Var220_mix',
  'Var35,Var181_mix',
  'Var35,Var205_mix',
  'Var35,Var208_mix',
  'Var35,Var210_mix',
  'Var35,Var218_mix',
  'Var35,Var219_mix',
  'Var35,Var225_mix',
  'Var35,Var229_mix',
  'Var35,Var192_mix',
  'Var35,Var193_mix',
  'Var35,Var199_mix',
  'Var35,Var202_mix',
  'Var35,Var222_mix',
  'Var35,Var228_mix',
  'Var44,Var78_mix',
  'Var44,Var203_mix',
  'Var44,Var206_mix',
  'Var44,Var210_mix',
  'Var44,Var218_mix',
  'Var44,Var219_mix',
  'Var44,Var223_mix'],
 {'Var109': 0.0,
  'Var112': 0.0,
  'Var113': -0.0038471345260009393,
  'Var119': 0.0,
  'Var123': 0.0,
  'Var125': 0.0,
  'Var126': -0.03469710444600227,
  'Var13': -0.0004615593547887542,
  'Var13,Var109_mix': -0.00015536543593919205,
  'Var13,Var112_mix': 0.0,
  'Var13,Var113_mix': 0.0,
  'Var13,Var119_mix': 0.0,
  'Var13,Var123_mix': 4.032847136237194e-07,
  'Var13,Var125_mix': 0.0,
  'Var13,Var126_mix': -0.012151270886241461,
  'Var13,Var133_mix': 0.0,
  'Var13,Var134_mix': 0.0,
  'Var13,Var140_mix': 0.0,
  'Var13,Var149_mix': 0.0,
  'Var13,Var153_mix': 0.0,
  'Var13,Var160_mix': 0.0,
  'Var13,Var163_mix': 0.0,
  'Var13,Var189_mix': -0.00032595486981923383,
  'Var13,Var21_mix': 0.0,
  'Var13,Var22_mix': 0.0,
  'Var13,Var24_mix': 0.0,
  'Var13,Var25_mix': 0.0,
  'Var13,Var28_mix': -0.00012592565184177396,
  'Var13,Var38_mix': -8.075776391136547e-05,
  'Var13,Var57_mix': 0.0,
  'Var13,Var73_mix': 0.0,
  'Var13,Var74_mix': 0.0,
  'Var13,Var76_mix': 0.0,
  'Var13,Var81_mix': 0.0,
  'Var13,Var83_mix': 0.0,
  'Var13,Var85_mix': 0.0,
  'Var13,Var94_mix': 0.0,
  'Var132': -9.588094067369823e-05,
  'Var133': 1.0787866090433695e-05,
  'Var134': 0.0,
  'Var140': 0.0,
  'Var143': 0.0,
  'Var144': 0.0,
  'Var149': 0.0,
  'Var153': 0.0,
  'Var160': 0.0,
  'Var163': 0.00019085449074152105,
  'Var173': 0.0,
  'Var181': 0.0,
  'Var189': -0.004330269612970894,
  'Var192': -0.0033655117567071846,
  'Var193': 0.0007001022629211651,
  'Var195': 0.0,
  'Var196': 0.0,
  'Var197': 0.00043705980843367076,
  'Var198': 0.00033875915947800816,
  'Var199': -0.004948605900196523,
  'Var202': 7.188550021053342e-05,
  'Var203': 0.0,
  'Var204': -8.468978986964082e-05,
  'Var205': -0.0017716297471275233,
  'Var206': -0.00032655979688978043,
  'Var207': 7.561588381110873e-06,
  'Var208': 0.0,
  'Var21': 0.0,
  'Var21,Var109_mix': 0.0,
  'Var21,Var112_mix': 0.0,
  'Var21,Var113_mix': 0.0,
  'Var21,Var119_mix': 0.0,
  'Var21,Var123_mix': 0.00012784125423159765,
  'Var21,Var125_mix': 0.0,
  'Var21,Var126_mix': -0.0014468847314492717,
  'Var21,Var133_mix': -0.00022039509601767815,
  'Var21,Var134_mix': 0.0,
  'Var21,Var140_mix': 1.4014143799756518e-05,
  'Var21,Var149_mix': 0.0,
  'Var21,Var153_mix': 0.0,
  'Var21,Var160_mix': 0.0003712235789279372,
  'Var21,Var163_mix': 0.0,
  'Var21,Var189_mix': 0.0,
  'Var21,Var22_mix': 0.0,
  'Var21,Var24_mix': 0.0,
  'Var21,Var25_mix': 0.0,
  'Var21,Var28_mix': 0.0,
  'Var21,Var38_mix': 0.0,
  'Var21,Var57_mix': 0.0,
  'Var21,Var73_mix': -2.1474911002794528e-05,
  'Var21,Var74_mix': 0.0,
  'Var21,Var76_mix': 0.0,
  'Var21,Var81_mix': 0.0,
  'Var21,Var83_mix': 0.0,
  'Var21,Var85_mix': 0.0,
  'Var21,Var94_mix': 0.0,
  'Var210': -0.0012698427421504732,
  'Var211': 0.0,
  'Var212': 0.0002984306881115284,
  'Var216': 7.380110260046813e-05,
  'Var217': -0.0005289079019707987,
  'Var218': 0.0,
  'Var219': -0.00032242612857480424,
  'Var22': 0.0,
  'Var22,Var109_mix': 0.0,
  'Var22,Var24_mix': 0.0,
  'Var22,Var25_mix': 0.0,
  'Var22,Var28_mix': 0.0,
  'Var22,Var38_mix': 0.0,
  'Var22,Var57_mix': 0.0,
  'Var22,Var73_mix': 0.00026374820273644684,
  'Var22,Var74_mix': 0.0,
  'Var22,Var76_mix': 0.0,
  'Var22,Var81_mix': 0.0,
  'Var22,Var83_mix': 0.0,
  'Var22,Var85_mix': 0.0,
  'Var22,Var94_mix': 0.0,
  'Var220': -3.3573452412616334e-05,
  'Var221': 0.0,
  'Var222': 0.00011554107046485296,
  'Var223': 7.057482489081224e-06,
  'Var225': 0.0,
  'Var226': 9.779654306352192e-05,
  'Var227': 0.0,
  'Var228': -5.2729476311630386e-05,
  'Var229': 0.0,
  'Var24': 0.0,
  'Var25': 0.0,
  'Var28': 0.0,
  'Var35': 0.0,
  'Var35,Var132_mix': 0.00010132528430817356,
  'Var35,Var143_mix': 2.6011864031283416e-05,
  'Var35,Var144_mix': 5.121715863520837e-05,
  'Var35,Var173_mix': 0.0,
  'Var35,Var181_mix': -0.0002477176353684607,
  'Var35,Var192_mix': -0.0016978286445268331,
  'Var35,Var193_mix': -0.0003853385439063217,
  'Var35,Var195_mix': 1.3106753192770881e-06,
  'Var35,Var196_mix': 1.2300183766633666e-05,
  'Var35,Var197_mix': 6.049270704910903e-06,
  'Var35,Var198_mix': 0.0004198193869245914,
  'Var35,Var199_mix': -0.000942980481725586,
  'Var35,Var202_mix': -0.00012118705645614014,
  'Var35,Var203_mix': 7.0574824889702015e-06,
  'Var35,Var204_mix': -7.309535435173764e-05,
  'Var35,Var205_mix': -0.00017018614916630703,
  'Var35,Var206_mix': 6.472719654304626e-05,
  'Var35,Var207_mix': -6.482801772156321e-05,
  'Var35,Var208_mix': -9.063823939614579e-05,
  'Var35,Var210_mix': -0.000642432548867311,
  'Var35,Var211_mix': 5.988777997900652e-05,
  'Var35,Var212_mix': -6.049270705466014e-07,
  'Var35,Var216_mix': 0.00023975276227339304,
  'Var35,Var217_mix': 0.0007501095674156133,
  'Var35,Var218_mix': -0.0004712381879168337,
  'Var35,Var219_mix': -0.00010203103255712609,
  'Var35,Var220_mix': 0.00037686956491933543,
  'Var35,Var221_mix': 9.416698064057538e-05,
  'Var35,Var222_mix': -9.336041121332794e-05,
  'Var35,Var223_mix': 1.5929746189802252e-05,
  'Var35,Var225_mix': -0.00017734445283401623,
  'Var35,Var226_mix': 0.0,
  'Var35,Var227_mix': -4.5772815001066114e-05,
  'Var35,Var228_mix': -0.0008163490816350816,
  'Var35,Var229_mix': -0.0006927423168969771,
  'Var35,Var44_mix': 0.0,
  'Var35,Var65_mix': 5.928285290868196e-05,
  'Var35,Var72_mix': -3.3371810055804474e-05,
  'Var35,Var78_mix': 0.00023945029873806423,
  'Var38': 0.0,
  'Var44': 0.0,
  'Var44,Var132_mix': 0.00033724684180169717,
  'Var44,Var143_mix': 0.0,
  'Var44,Var144_mix': 0.00016030567368163773,
  'Var44,Var173_mix': -2.822992995699103e-06,
  'Var44,Var181_mix': 0.0,
  'Var44,Var195_mix': 8.277418747937304e-05,
  'Var44,Var196_mix': 3.488412773189342e-05,
  'Var44,Var203_mix': -0.00022735175732835344,
  'Var44,Var205_mix': 0.0006045237857827956,
  'Var44,Var206_mix': -0.0004833367293266555,
  'Var44,Var207_mix': 0.0001353020214344136,
  'Var44,Var208_mix': 0.0,
  'Var44,Var210_mix': -0.00045248544873144336,
  'Var44,Var211_mix': 0.0,
  'Var44,Var218_mix': -0.0005220520618385294,
  'Var44,Var219_mix': -0.00010142610548669051,
  'Var44,Var221_mix': -2.570940049617665e-05,
  'Var44,Var223_mix': -0.00014074636506888893,
  'Var44,Var65_mix': -5.988777997911754e-05,
  'Var44,Var72_mix': 0.0005824439477096766,
  'Var44,Var78_mix': -0.00023340102803326435,
  'Var57': 0.0,
  'Var6': 0.0,
  'Var6,Var109_mix': 0.0,
  'Var6,Var112_mix': 0.0,
  'Var6,Var113_mix': 4.950319860219654e-05,
  'Var6,Var119_mix': 0.0,
  'Var6,Var123_mix': 0.0,
  'Var6,Var125_mix': 0.0,
  'Var6,Var126_mix': -0.00010314006551959132,
  'Var6,Var13,Var109_mix': 0.0,
  'Var6,Var13,Var112_mix': 0.0,
  'Var6,Var13,Var113_mix': 0.0,
  'Var6,Var13,Var119_mix': 0.0,
  'Var6,Var13,Var123_mix': 0.0,
  'Var6,Var13,Var125_mix': 0.0,
  'Var6,Var13,Var126_mix': 0.0,
  'Var6,Var13,Var133_mix': 0.0,
  'Var6,Var13,Var134_mix': 0.0,
  'Var6,Var13,Var140_mix': 0.0,
  'Var6,Var13,Var149_mix': 0.0,
  'Var6,Var13,Var153_mix': 0.0,
  'Var6,Var13,Var160_mix': 0.0,
  'Var6,Var13,Var163_mix': 0.0,
  'Var6,Var13,Var189_mix': 0.0005369735962439393,
  'Var6,Var13,Var21,Var109_mix': 0.0,
  'Var6,Var13,Var21,Var112_mix': 0.0,
  'Var6,Var13,Var21,Var113_mix': 0.0,
  'Var6,Var13,Var21,Var119_mix': 0.0,
  'Var6,Var13,Var21,Var123_mix': 0.0,
  'Var6,Var13,Var21,Var125_mix': 0.0,
  'Var6,Var13,Var21,Var126_mix': 0.0,
  'Var6,Var13,Var21,Var133_mix': 0.0,
  'Var6,Var13,Var21,Var134_mix': 0.0,
  'Var6,Var13,Var21,Var140_mix': 0.0,
  'Var6,Var13,Var21,Var149_mix': 0.0,
  'Var6,Var13,Var21,Var153_mix': 0.0,
  'Var6,Var13,Var21,Var160_mix': 0.0,
  'Var6,Var13,Var21,Var163_mix': 0.0,
  'Var6,Var13,Var21,Var189_mix': 0.0,
  'Var6,Var13,Var21,Var22,Var109_mix': 0.0,
  'Var6,Var13,Var21,Var22,Var112_mix': 0.0,
  'Var6,Var13,Var21,Var22,Var113_mix': 0.0,
  'Var6,Var13,Var21,Var22,Var119_mix': 0.0,
  'Var6,Var13,Var21,Var22,Var123_mix': 0.0,
  'Var6,Var13,Var21,Var22,Var125_mix': 0.0,
  'Var6,Var13,Var21,Var22,Var126_mix': 5.1317979813836345e-05,
  'Var6,Var13,Var21,Var22,Var133_mix': 0.0,
  'Var6,Var13,Var21,Var22,Var134_mix': 0.0,
  'Var6,Var13,Var21,Var22,Var140_mix': 0.0,
  'Var6,Var13,Var21,Var22,Var149_mix': 0.0,
  'Var6,Var13,Var21,Var22,Var153_mix': 0.0,
  'Var6,Var13,Var21,Var22,Var160_mix': 0.0,
  'Var6,Var13,Var21,Var22,Var163_mix': 0.0,
  'Var6,Var13,Var21,Var22,Var189_mix': 0.0,
  'Var6,Var13,Var21,Var22,Var24_mix': 0.0,
  'Var6,Var13,Var21,Var22,Var25_mix': 0.0,
  'Var6,Var13,Var21,Var22,Var28_mix': 0.0,
  'Var6,Var13,Var21,Var22,Var38_mix': 0.0,
  'Var6,Var13,Var21,Var22,Var57_mix': 0.0,
  'Var6,Var13,Var21,Var22,Var73_mix': 0.0,
  'Var6,Var13,Var21,Var22,Var74_mix': 0.0,
  'Var6,Var13,Var21,Var22,Var76_mix': 0.0,
  'Var6,Var13,Var21,Var22,Var81_mix': 0.0,
  'Var6,Var13,Var21,Var22,Var83_mix': 0.0,
  'Var6,Var13,Var21,Var22,Var85_mix': 0.0,
  'Var6,Var13,Var21,Var22,Var94_mix': 0.0,
  'Var6,Var13,Var21,Var22_mix': 0.00013903240503565506,
  'Var6,Var13,Var21,Var24,Var109_mix': 0.0,
  'Var6,Var13,Var21,Var24,Var112_mix': 0.0,
  'Var6,Var13,Var21,Var24,Var113_mix': 5.182208570575497e-05,
  'Var6,Var13,Var21,Var24,Var119_mix': 0.0,
  'Var6,Var13,Var21,Var24,Var123_mix': 0.0,
  'Var6,Var13,Var21,Var24,Var125_mix': 0.0,
  'Var6,Var13,Var21,Var24,Var126_mix': -0.000333113173486721,
  'Var6,Var13,Var21,Var24,Var133_mix': 0.0,
  'Var6,Var13,Var21,Var24,Var134_mix': 0.0,
  'Var6,Var13,Var21,Var24,Var140_mix': 0.0,
  'Var6,Var13,Var21,Var24,Var149_mix': 0.0,
  'Var6,Var13,Var21,Var24,Var153_mix': 0.0,
  'Var6,Var13,Var21,Var24,Var160_mix': 0.0,
  'Var6,Var13,Var21,Var24,Var163_mix': 0.0,
  'Var6,Var13,Var21,Var24,Var189_mix': 0.0,
  'Var6,Var13,Var21,Var24,Var25_mix': 0.0,
  'Var6,Var13,Var21,Var24,Var28_mix': 0.0,
  'Var6,Var13,Var21,Var24,Var38_mix': 0.0,
  'Var6,Var13,Var21,Var24,Var57_mix': 0.0,
  'Var6,Var13,Var21,Var24,Var73_mix': 0.0,
  'Var6,Var13,Var21,Var24,Var74_mix': 0.0,
  'Var6,Var13,Var21,Var24,Var76_mix': 0.0,
  'Var6,Var13,Var21,Var24,Var81_mix': 0.0,
  'Var6,Var13,Var21,Var24,Var83_mix': 0.0,
  'Var6,Var13,Var21,Var24,Var85_mix': 0.0,
  'Var6,Var13,Var21,Var24,Var94_mix': 0.0,
  'Var6,Var13,Var21,Var24_mix': 0.0,
  'Var6,Var13,Var21,Var25,Var109_mix': 0.0,
  'Var6,Var13,Var21,Var25,Var112_mix': 0.0,
  'Var6,Var13,Var21,Var25,Var113_mix': 0.0,
  'Var6,Var13,Var21,Var25,Var119_mix': 0.0,
  'Var6,Var13,Var21,Var25,Var123_mix': 0.0,
  'Var6,Var13,Var21,Var25,Var125_mix': 0.0,
  'Var6,Var13,Var21,Var25,Var126_mix': 0.0,
  'Var6,Var13,Var21,Var25,Var133_mix': 0.0,
  'Var6,Var13,Var21,Var25,Var134_mix': 0.0,
  'Var6,Var13,Var21,Var25,Var140_mix': 0.0,
  'Var6,Var13,Var21,Var25,Var149_mix': 0.0,
  'Var6,Var13,Var21,Var25,Var153_mix': 0.0,
  'Var6,Var13,Var21,Var25,Var160_mix': 0.0,
  'Var6,Var13,Var21,Var25,Var163_mix': 0.0,
  'Var6,Var13,Var21,Var25,Var189_mix': 0.0,
  'Var6,Var13,Var21,Var25,Var28_mix': 0.0,
  'Var6,Var13,Var21,Var25,Var38_mix': 0.0,
  'Var6,Var13,Var21,Var25,Var57_mix': 0.0,
  'Var6,Var13,Var21,Var25,Var73_mix': 0.0,
  'Var6,Var13,Var21,Var25,Var74_mix': 0.0,
  'Var6,Var13,Var21,Var25,Var76_mix': 0.0,
  'Var6,Var13,Var21,Var25,Var81_mix': 0.0,
  'Var6,Var13,Var21,Var25,Var83_mix': 0.0,
  'Var6,Var13,Var21,Var25,Var85_mix': 0.0,
  'Var6,Var13,Var21,Var25,Var94_mix': 0.0,
  'Var6,Var13,Var21,Var25_mix': 0.0,
  'Var6,Var13,Var21,Var28,Var109_mix': 7.97495521269731e-05,
  'Var6,Var13,Var21,Var28,Var112_mix': 0.0,
  'Var6,Var13,Var21,Var28,Var113_mix': 0.0,
  'Var6,Var13,Var21,Var28,Var119_mix': 0.0,
  'Var6,Var13,Var21,Var28,Var123_mix': 0.0,
  'Var6,Var13,Var21,Var28,Var125_mix': 0.0,
  'Var6,Var13,Var21,Var28,Var126_mix': -0.00029278470212035224,
  'Var6,Var13,Var21,Var28,Var133_mix': 0.0,
  'Var6,Var13,Var21,Var28,Var134_mix': 9.184809353690593e-05,
  'Var6,Var13,Var21,Var28,Var140_mix': 0.0,
  'Var6,Var13,Var21,Var28,Var149_mix': 0.0,
  'Var6,Var13,Var21,Var28,Var153_mix': 0.0,
  'Var6,Var13,Var21,Var28,Var160_mix': 0.0,
  'Var6,Var13,Var21,Var28,Var38_mix': 0.0,
  'Var6,Var13,Var21,Var28,Var57_mix': 0.0,
  'Var6,Var13,Var21,Var28,Var73_mix': 0.0,
  'Var6,Var13,Var21,Var28,Var74_mix': 0.0,
  'Var6,Var13,Var21,Var28,Var76_mix': 0.0,
  'Var6,Var13,Var21,Var28,Var81_mix': 0.0,
  'Var6,Var13,Var21,Var28,Var83_mix': 0.0,
  'Var6,Var13,Var21,Var28,Var85_mix': 0.0,
  'Var6,Var13,Var21,Var28,Var94_mix': 0.0,
  'Var6,Var13,Var21,Var28_mix': 0.0,
  'Var6,Var13,Var21,Var38_mix': 0.0,
  'Var6,Var13,Var21,Var57_mix': 0.0,
  'Var6,Var13,Var21,Var73_mix': 0.0,
  'Var6,Var13,Var21,Var74_mix': 0.0,
  'Var6,Var13,Var21,Var76_mix': 0.0,
  'Var6,Var13,Var21,Var81_mix': 0.0,
  'Var6,Var13,Var21,Var83_mix': 0.0,
  'Var6,Var13,Var21,Var85_mix': 0.0,
  'Var6,Var13,Var21,Var94_mix': 0.0001220944470616825,
  'Var6,Var13,Var21_mix': 0.0,
  'Var6,Var13,Var22,Var109_mix': 0.0,
  'Var6,Var13,Var22,Var112_mix': 0.0,
  'Var6,Var13,Var22,Var113_mix': 0.0,
  'Var6,Var13,Var22,Var119_mix': 0.0,
  'Var6,Var13,Var22,Var123_mix': 0.0,
  'Var6,Var13,Var22,Var125_mix': 0.0,
  'Var6,Var13,Var22,Var126_mix': 0.0,
  'Var6,Var13,Var22,Var133_mix': 0.0,
  'Var6,Var13,Var22,Var134_mix': 0.0,
  'Var6,Var13,Var22,Var140_mix': 0.0,
  'Var6,Var13,Var22,Var149_mix': 0.0,
  'Var6,Var13,Var22,Var153_mix': 0.0,
  'Var6,Var13,Var22,Var160_mix': 0.0,
  'Var6,Var13,Var22,Var163_mix': 0.0,
  'Var6,Var13,Var22,Var189_mix': 0.0,
  'Var6,Var13,Var22,Var24_mix': 0.0,
  'Var6,Var13,Var22,Var25_mix': 0.0,
  'Var6,Var13,Var22,Var28_mix': 0.0,
  'Var6,Var13,Var22,Var38_mix': 0.0,
  'Var6,Var13,Var22,Var57_mix': 0.0,
  'Var6,Var13,Var22,Var73_mix': 0.0,
  'Var6,Var13,Var22,Var74_mix': 0.0,
  'Var6,Var13,Var22,Var76_mix': 0.0,
  'Var6,Var13,Var22,Var81_mix': 0.0,
  'Var6,Var13,Var22,Var83_mix': 0.0,
  'Var6,Var13,Var22,Var85_mix': 0.0,
  'Var6,Var13,Var22,Var94_mix': 0.0,
  'Var6,Var13,Var22_mix': 0.0,
  'Var6,Var13,Var24,Var109_mix': 0.0,
  'Var6,Var13,Var24,Var112_mix': 0.0,
  'Var6,Var13,Var24,Var113_mix': 0.0,
  'Var6,Var13,Var24,Var119_mix': 0.0,
  'Var6,Var13,Var24,Var123_mix': 0.0,
  'Var6,Var13,Var24,Var125_mix': 0.0,
  'Var6,Var13,Var24,Var126_mix': -0.001662541232081205,
  'Var6,Var13,Var24,Var133_mix': 0.0,
  'Var6,Var13,Var24,Var134_mix': 6.704608364660469e-05,
  'Var6,Var13,Var24,Var140_mix': 0.0,
  'Var6,Var13,Var24,Var149_mix': 0.0,
  'Var6,Var13,Var24,Var153_mix': 0.0,
  'Var6,Var13,Var24,Var160_mix': 0.0,
  'Var6,Var13,Var24,Var163_mix': 0.0,
  'Var6,Var13,Var24,Var189_mix': 0.0,
  'Var6,Var13,Var24,Var25_mix': 0.0,
  'Var6,Var13,Var24,Var28_mix': 0.0,
  'Var6,Var13,Var24,Var38_mix': 0.0,
  'Var6,Var13,Var24,Var57_mix': 0.0,
  'Var6,Var13,Var24,Var73_mix': 0.0,
  'Var6,Var13,Var24,Var74_mix': 0.0,
  'Var6,Var13,Var24,Var76_mix': 0.0,
  'Var6,Var13,Var24,Var81_mix': 0.0,
  'Var6,Var13,Var24,Var83_mix': 0.0,
  'Var6,Var13,Var24,Var85_mix': 0.0,
  'Var6,Var13,Var24,Var94_mix': 0.0,
  'Var6,Var13,Var24_mix': 0.0,
  'Var6,Var13,Var25,Var109_mix': 0.0,
  'Var6,Var13,Var25,Var112_mix': 0.0,
  'Var6,Var13,Var25,Var113_mix': 0.0,
  'Var6,Var13,Var25,Var119_mix': 0.0,
  'Var6,Var13,Var25,Var123_mix': 0.0,
  'Var6,Var13,Var25,Var125_mix': 0.0,
  'Var6,Var13,Var25,Var126_mix': 0.0,
  'Var6,Var13,Var25,Var133_mix': 0.0,
  'Var6,Var13,Var25,Var134_mix': 0.0,
  'Var6,Var13,Var25,Var28_mix': -0.0004899405165129656,
  'Var6,Var13,Var25,Var38_mix': 0.0,
  'Var6,Var13,Var25,Var57_mix': -0.00018440193532298643,
  'Var6,Var13,Var25,Var73_mix': 0.0,
  'Var6,Var13,Var25,Var74_mix': 0.0,
  'Var6,Var13,Var25,Var76_mix': 0.0,
  'Var6,Var13,Var25,Var81_mix': 0.0,
  'Var6,Var13,Var25,Var83_mix': 0.0,
  'Var6,Var13,Var25,Var85_mix': 0.0,
  'Var6,Var13,Var25,Var94_mix': 0.0,
  'Var6,Var13,Var25_mix': 0.0,
  'Var6,Var13,Var28_mix': 0.0,
  'Var6,Var13,Var38_mix': 0.00012098541409921726,
  'Var6,Var13,Var57_mix': 0.0,
  'Var6,Var13,Var73_mix': 0.0004999722237652993,
  'Var6,Var13,Var74_mix': 0.0,
  'Var6,Var13,Var76_mix': 0.0,
  'Var6,Var13,Var81_mix': 0.0,
  'Var6,Var13,Var83_mix': 0.0,
  'Var6,Var13,Var85_mix': 0.0,
  'Var6,Var13,Var94_mix': 0.0,
  'Var6,Var133_mix': 0.0,
  'Var6,Var134_mix': 0.0,
  'Var6,Var13_mix': 0.0,
  'Var6,Var140_mix': 0.0,
  'Var6,Var149_mix': 0.0,
  'Var6,Var153_mix': 0.0,
  'Var6,Var160_mix': 0.0,
  'Var6,Var163_mix': 0.0,
  'Var6,Var189_mix': 0.0,
  'Var6,Var21,Var109_mix': 0.0,
  'Var6,Var21,Var112_mix': 0.0,
  'Var6,Var21,Var113_mix': 0.0,
  'Var6,Var21,Var119_mix': 0.0,
  'Var6,Var21,Var123_mix': 0.0,
  'Var6,Var21,Var125_mix': 0.0,
  'Var6,Var21,Var126_mix': -6.150091883427855e-06,
  'Var6,Var21,Var133_mix': 0.0,
  'Var6,Var21,Var134_mix': 0.0,
  'Var6,Var21,Var140_mix': 0.0,
  'Var6,Var21,Var149_mix': 0.0,
  'Var6,Var21,Var153_mix': -0.00014356935806469906,
  'Var6,Var21,Var160_mix': 0.0,
  'Var6,Var21,Var163_mix': 0.0,
  'Var6,Var21,Var189_mix': 0.0,
  'Var6,Var21,Var22_mix': 0.0,
  'Var6,Var21,Var24_mix': 0.0,
  'Var6,Var21,Var25_mix': 0.0,
  'Var6,Var21,Var28_mix': 0.0,
  'Var6,Var21,Var38_mix': 0.0,
  'Var6,Var21,Var57_mix': 0.0,
  'Var6,Var21,Var73_mix': 0.00010314006551959132,
  'Var6,Var21,Var74_mix': 0.0,
  'Var6,Var21,Var76_mix': 0.0,
  'Var6,Var21,Var81_mix': 0.0,
  'Var6,Var21,Var83_mix': 0.0,
  'Var6,Var21,Var85_mix': 0.0,
  'Var6,Var21,Var94_mix': 0.0,
  'Var6,Var21_mix': 0.0,
  'Var6,Var22,Var109_mix': 0.0,
  'Var6,Var22,Var112_mix': 0.0,
  'Var6,Var22,Var113_mix': 0.0,
  'Var6,Var22,Var119_mix': 0.0,
  'Var6,Var22,Var123_mix': 0.0,
  'Var6,Var22,Var125_mix': 0.0,
  'Var6,Var22,Var126_mix': 0.0,
  'Var6,Var22,Var133_mix': 0.0,
  'Var6,Var22,Var134_mix': 0.0,
  'Var6,Var22,Var140_mix': 0.0,
  'Var6,Var22,Var149_mix': 0.0,
  'Var6,Var22,Var153_mix': 0.0,
  'Var6,Var22,Var160_mix': 0.0,
  'Var6,Var22,Var163_mix': 0.0,
  'Var6,Var22,Var189_mix': 0.0,
  'Var6,Var22,Var24_mix': 0.0,
  'Var6,Var22,Var25_mix': 0.0,
  'Var6,Var22,Var28_mix': 0.0,
  'Var6,Var22,Var38_mix': 0.0,
  'Var6,Var22,Var57_mix': 0.0,
  'Var6,Var22,Var73_mix': 0.0,
  'Var6,Var22,Var74_mix': 0.0,
  'Var6,Var22,Var76_mix': 0.0,
  'Var6,Var22,Var81_mix': 0.0,
  'Var6,Var22,Var83_mix': 0.0,
  'Var6,Var22,Var85_mix': 0.0,
  'Var6,Var22,Var94_mix': 0.0,
  'Var6,Var22_mix': 0.0,
  'Var6,Var24,Var109_mix': 0.0,
  'Var6,Var24,Var112_mix': 0.0,
  'Var6,Var24,Var113_mix': 0.0,
  'Var6,Var24,Var119_mix': 0.0,
  'Var6,Var24,Var123_mix': 0.0,
  'Var6,Var24,Var25_mix': 0.0,
  'Var6,Var24,Var28_mix': -0.000201339893296959,
  'Var6,Var24,Var38_mix': 0.0,
  'Var6,Var24,Var57_mix': 0.0,
  'Var6,Var24,Var73_mix': 0.0,
  'Var6,Var24,Var74_mix': 0.0,
  'Var6,Var24,Var76_mix': -0.0001292527507295027,
  'Var6,Var24,Var81_mix': 0.0,
  'Var6,Var24,Var83_mix': 0.0,
  'Var6,Var24,Var85_mix': 0.0,
  'Var6,Var24,Var94_mix': -5.242701277663464e-06,
  'Var6,Var24_mix': 0.0,
  'Var6,Var25_mix': 0.0,
  'Var6,Var28_mix': 0.0,
  'Var6,Var38_mix': 0.0,
  'Var6,Var57_mix': 0.0,
  'Var6,Var73_mix': 0.00019317337784507949,
  'Var6,Var74_mix': 0.0,
  'Var6,Var76_mix': 0.0,
  'Var6,Var81_mix': 1.64338520818319e-05,
  'Var6,Var83_mix': 1.9055202720608122e-05,
  'Var6,Var85_mix': 0.0,
  'Var6,Var94_mix': 0.0,
  'Var65': -1.1896899053120968e-05,
  'Var7': 5.010812567263212e-05,
  'Var7,Var132_mix': -3.0246353521778957e-07,
  'Var7,Var143_mix': 0.00018480522003661015,
  'Var7,Var144_mix': -2.3390513392507195e-05,
  'Var7,Var173_mix': -8.267336630174427e-06,
  'Var7,Var181_mix': 0.0,
  'Var7,Var192_mix': -0.0001233043012028867,
  'Var7,Var193_mix': -0.0002831058689924948,
  'Var7,Var195_mix': 1.1090329624652284e-06,
  'Var7,Var196_mix': 4.758759621226183e-05,
  'Var7,Var197_mix': 0.00023723223281302275,
  'Var7,Var198_mix': 7.501095674156133e-05,
  'Var7,Var199_mix': -0.0003100251236294316,
  'Var7,Var202_mix': -0.0005752856440421894,
  'Var7,Var203_mix': 4.032847136237194e-07,
  'Var7,Var204_mix': 0.0,
  'Var7,Var205_mix': 4.436131850193981e-06,
  'Var7,Var206_mix': 0.0004975525154833349,
  'Var7,Var207_mix': 0.0,
  'Var7,Var208_mix': -1.078786609065574e-05,
  'Var7,Var210_mix': 0.00038039830616387604,
  'Var7,Var211_mix': -3.548905480266207e-05,
  'Var7,Var212_mix': -0.0009467108653271605,
  'Var7,Var216_mix': -0.0005828472324233003,
  'Var7,Var217_mix': -0.00919055616087483,
  'Var7,Var218_mix': -0.0023128378328649557,
  'Var7,Var219_mix': -0.00022140330780184847,
  'Var7,Var220_mix': -0.0008055612155445369,
  'Var7,Var221_mix': -0.00033270988877309726,
  'Var7,Var222_mix': 0.00016827054677648334,
  'Var7,Var223_mix': 0.00010162774784328032,
  'Var7,Var225_mix': 5.272947631151936e-05,
  'Var7,Var226_mix': 0.0006447514359707585,
  'Var7,Var227_mix': 2.5003652247113095e-05,
  'Var7,Var228_mix': 7.793477091555534e-05,
  'Var7,Var229_mix': 0.0001160451763568826,
  'Var7,Var35_mix': 0.0,
  'Var7,Var44_mix': -6.502966007837507e-05,
  'Var7,Var65_mix': -9.900639720472615e-05,
  'Var7,Var72_mix': 0.00033502877587654467,
  'Var7,Var78_mix': 1.784534857951492e-05,
  'Var72': 0.0,
  'Var73': -0.0016938966185686688,
  'Var74': -0.0018407930755208746,
  'Var76': 0.0,
  'Var78': 1.7139600330562388e-05,
  'Var81': -0.001946856955214682,
  'Var83': 0.0,
  'Var85': 0.0,
  'Var94': 0.0})
In [63]:
num_cols_selected = ['Var13',
  'Var73',
  'Var74',
  'Var81',
  'Var113',
  'Var126',
  'Var189',
  'Var6,Var126_mix',
  'Var13,Var28_mix',
  'Var13,Var38_mix',
  'Var13,Var109_mix',
  'Var13,Var126_mix',
  'Var13,Var189_mix',
  'Var21,Var126_mix',
  'Var21,Var133_mix',
  'Var6,Var21,Var153_mix',
  'Var6,Var24,Var28_mix',
  'Var6,Var24,Var76_mix',
  'Var6,Var13,Var24,Var126_mix',
  'Var6,Var13,Var25,Var28_mix',
  'Var6,Var13,Var25,Var57_mix',
  'Var6,Var13,Var21,Var24,Var126_mix',
  'Var6,Var13,Var21,Var28,Var126_mix']
In [61]:
 cat_cols_selected = ['Var132',
  'Var205',
  'Var206',
  'Var210',
  'Var219',
  'Var192',
  'Var199',
  'Var204',
  'Var217',
  'Var7,Var65_mix',
  'Var7,Var218_mix',
  'Var7,Var219_mix',
  'Var7,Var221_mix',
  'Var7,Var192_mix',
  'Var7,Var193_mix',
  'Var7,Var199_mix',
  'Var7,Var202_mix',
  'Var7,Var212_mix',
  'Var7,Var216_mix',
  'Var7,Var217_mix',
  'Var7,Var220_mix',
  'Var35,Var181_mix',
  'Var35,Var205_mix',
  'Var35,Var208_mix',
  'Var35,Var210_mix',
  'Var35,Var218_mix',
  'Var35,Var219_mix',
  'Var35,Var225_mix',
  'Var35,Var229_mix',
  'Var35,Var192_mix',
  'Var35,Var193_mix',
  'Var35,Var199_mix',
  'Var35,Var202_mix',
  'Var35,Var222_mix',
  'Var35,Var228_mix',
  'Var44,Var78_mix',
  'Var44,Var203_mix',
  'Var44,Var206_mix',
  'Var44,Var210_mix',
  'Var44,Var218_mix',
  'Var44,Var219_mix',
  'Var44,Var223_mix']
In [64]:
good_columns2 = cat_cols_selected + num_cols_selected
In [65]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=380,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.08,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )



estimator_cb.fit(data_train[good_columns2], labels_train, cat_features = cat_cols_selected,verbose=10, plot=True,
                 eval_set = (data_val[good_columns2], labels_val))

prb = estimator_cb.predict_proba(data_val[good_columns2])                                                                          
                                                                
print(f'ROC AUC: {roc_auc_score(labels_val.values,prb[:,1])}')    
print(f'PRC AUC: {average_precision_score(labels_val.values,prb[:,1])}')   
                                                                        
pred = estimator_cb.predict(data_val[good_columns2])
print(classification_report(labels_val,pred))  
print('------------------')
print('------------------')

pr_plot(labels_val.values,prb,[0.05,0.1,0.2,0.25,0.3,0.35,0.4,0.5,0.6,0.7,0.8,0.9,0.95,0.96,0.97,0.99])
0:	learn: 0.5519452	test: 0.5560269	best: 0.5560269 (0)	total: 153ms	remaining: 58.2s
10:	learn: 0.5786859	test: 0.5815250	best: 0.5821655 (6)	total: 1.5s	remaining: 50.3s
20:	learn: 0.5794672	test: 0.5814785	best: 0.5823294 (16)	total: 2.83s	remaining: 48.4s
30:	learn: 0.6601694	test: 0.6568312	best: 0.6568312 (30)	total: 4.31s	remaining: 48.6s
40:	learn: 0.7020219	test: 0.6953762	best: 0.6953762 (40)	total: 5.88s	remaining: 48.7s
50:	learn: 0.7229486	test: 0.7110304	best: 0.7110304 (50)	total: 7.56s	remaining: 48.8s
60:	learn: 0.7316695	test: 0.7179956	best: 0.7179956 (60)	total: 9.24s	remaining: 48.3s
70:	learn: 0.7378988	test: 0.7220899	best: 0.7220899 (70)	total: 10.8s	remaining: 47.1s
80:	learn: 0.7418731	test: 0.7236800	best: 0.7242329 (77)	total: 12.4s	remaining: 45.8s
90:	learn: 0.7466585	test: 0.7258348	best: 0.7258348 (90)	total: 14.1s	remaining: 44.8s
100:	learn: 0.7490828	test: 0.7273776	best: 0.7273939 (99)	total: 15.7s	remaining: 43.4s
110:	learn: 0.7507576	test: 0.7281425	best: 0.7281425 (110)	total: 17.3s	remaining: 42s
120:	learn: 0.7546104	test: 0.7310944	best: 0.7310944 (120)	total: 18.9s	remaining: 40.5s
130:	learn: 0.7550428	test: 0.7309270	best: 0.7313065 (121)	total: 20.4s	remaining: 38.8s
140:	learn: 0.7574762	test: 0.7323049	best: 0.7323099 (138)	total: 22s	remaining: 37.3s
150:	learn: 0.7589058	test: 0.7328299	best: 0.7329515 (146)	total: 23.5s	remaining: 35.6s
160:	learn: 0.7603109	test: 0.7333826	best: 0.7333826 (160)	total: 25s	remaining: 34s
170:	learn: 0.7615475	test: 0.7334864	best: 0.7334945 (169)	total: 26.5s	remaining: 32.3s
180:	learn: 0.7625361	test: 0.7333032	best: 0.7334945 (169)	total: 27.9s	remaining: 30.7s
190:	learn: 0.7631741	test: 0.7333000	best: 0.7334945 (169)	total: 29.4s	remaining: 29.1s
200:	learn: 0.7639005	test: 0.7329502	best: 0.7334945 (169)	total: 30.8s	remaining: 27.4s
210:	learn: 0.7644868	test: 0.7331801	best: 0.7334945 (169)	total: 32.3s	remaining: 25.8s
220:	learn: 0.7653224	test: 0.7328864	best: 0.7334945 (169)	total: 33.8s	remaining: 24.3s
230:	learn: 0.7659861	test: 0.7320665	best: 0.7334945 (169)	total: 35.3s	remaining: 22.8s
240:	learn: 0.7669547	test: 0.7326749	best: 0.7334945 (169)	total: 36.8s	remaining: 21.2s
250:	learn: 0.7678331	test: 0.7329995	best: 0.7334945 (169)	total: 38.2s	remaining: 19.7s
260:	learn: 0.7690979	test: 0.7326261	best: 0.7334945 (169)	total: 39.8s	remaining: 18.1s
270:	learn: 0.7700035	test: 0.7325933	best: 0.7334945 (169)	total: 41.3s	remaining: 16.6s
280:	learn: 0.7714778	test: 0.7330269	best: 0.7334945 (169)	total: 42.8s	remaining: 15.1s
290:	learn: 0.7720653	test: 0.7332443	best: 0.7334945 (169)	total: 44.3s	remaining: 13.6s
300:	learn: 0.7724101	test: 0.7331722	best: 0.7334945 (169)	total: 45.8s	remaining: 12s
310:	learn: 0.7729082	test: 0.7331380	best: 0.7334945 (169)	total: 47.3s	remaining: 10.5s
320:	learn: 0.7734896	test: 0.7334786	best: 0.7334945 (169)	total: 48.8s	remaining: 8.96s
330:	learn: 0.7742760	test: 0.7328913	best: 0.7334945 (169)	total: 50.3s	remaining: 7.44s
340:	learn: 0.7755870	test: 0.7325078	best: 0.7334945 (169)	total: 51.9s	remaining: 5.93s
350:	learn: 0.7762935	test: 0.7327324	best: 0.7334945 (169)	total: 53.4s	remaining: 4.41s
360:	learn: 0.7764421	test: 0.7325477	best: 0.7334945 (169)	total: 54.7s	remaining: 2.88s
370:	learn: 0.7769459	test: 0.7323079	best: 0.7334945 (169)	total: 56.2s	remaining: 1.36s
379:	learn: 0.7774857	test: 0.7320543	best: 0.7334945 (169)	total: 57.7s	remaining: 0us
bestTest = 0.7334944606
bestIteration = 169
Shrink model to first 170 iterations.
ROC AUC: 0.733494338033852
PRC AUC: 0.20539178175950468
              precision    recall  f1-score   support

          -1       0.93      1.00      0.96     11107
           1       0.62      0.01      0.02       893

    accuracy                           0.93     12000
   macro avg       0.78      0.51      0.49     12000
weighted avg       0.90      0.93      0.89     12000

------------------
------------------
In [66]:
data_plus_new_num = normed_fe_interaction_2_3_4_5(features[good_columns], max_feats = 100,num_columns=num_columns)
data_plus_new_num_test = normed_fe_interaction_2_3_4_5(test_data[good_columns], max_feats = 100,num_columns=num_columns)

data_plus_num_cat,cat_columns_new = categ_fe_interaction(data_plus_new_num, max_feats = 100,cat_columns=cat_columns)
data_plus_num_cat_test = categ_fe_interaction(data_plus_new_num_test, max_feats = 100,cat_columns=cat_columns)[0]

data = cat_prep(data_plus_num_cat,cat_columns_new)
data_test = cat_prep(data_plus_num_cat_test,cat_columns_new)

data.head()
Out[66]:
Var6 Var13 Var21 Var22 Var24 Var25 Var28 Var38 Var57 Var73 Var74 Var76 Var81 Var83 Var85 Var94 Var109 Var112 Var113 Var119 Var123 Var125 Var126 Var133 Var134 Var140 Var149 Var153 Var160 Var163 Var189 Var7 Var35 Var44 Var65 Var72 Var78 Var132 Var143 Var144 ... Var35,Var221_mix Var35,Var223_mix Var35,Var225_mix Var35,Var226_mix Var35,Var227_mix Var35,Var229_mix Var35,Var192_mix Var35,Var193_mix Var35,Var197_mix Var35,Var198_mix Var35,Var199_mix Var35,Var202_mix Var35,Var204_mix Var35,Var212_mix Var35,Var216_mix Var35,Var217_mix Var35,Var220_mix Var35,Var222_mix Var35,Var228_mix Var44,Var65_mix Var44,Var72_mix Var44,Var78_mix Var44,Var132_mix Var44,Var143_mix Var44,Var144_mix Var44,Var173_mix Var44,Var181_mix Var44,Var195_mix Var44,Var196_mix Var44,Var203_mix Var44,Var205_mix Var44,Var206_mix Var44,Var207_mix Var44,Var208_mix Var44,Var210_mix Var44,Var211_mix Var44,Var218_mix Var44,Var219_mix Var44,Var221_mix Var44,Var223_mix
0 3052.0 NaN 480.0 600.0 20.0 480.0 200.00 82752.0 2.907926 34 NaN 716008.0 14599.92 5.0 32.0 NaN 144.0 144.0 -1209960.0 1660.0 66.0 NaN 4.0 326915.0 604276.0 NaN 389396.0 2313888.0 28.0 599532.0 NaN nan 0.0 0.0 nan nan 0.0 0.0 0.0 9.0 ... 0.0Al6ZaUT 0.0LM8l689qOp 0.0nan 0.0fKCe 0.002N6s8f 0.0nan 0.0NESt0G8EIb 0.0AERks4l 0.00LaQ 0.0UaKK0yW 0.0I1sFbv_0IT 0.0EkHG 0.0k13i 0.0JBfYVit4g8 0.0TDctq2l 0.0KmRo 0.0hLKtJ9p 0.0vr93T2a 0.0xwM2aC7IdeMC0 0.0nan 0.0nan 0.00.0 0.00.0 0.00.0 0.09.0 0.00.0 0.00.0 0.0taul 0.01K8T 0.09_Y1 0.009_Q 0.0IYzP 0.0GjJ35utlTa_GNSvxxpb9ju 0.0kIsH 0.0uKAI 0.0L84s 0.0cJvF 0.0FzaX 0.0Al6ZaUT 0.0LM8l689qOp
1 1813.0 636.0 212.0 265.0 2.0 128.0 166.56 2706120.0 5.870327 128 0.0 1661128.0 67529.09 25.0 10.0 32289.0 80.0 72.0 417932.0 1025.0 66.0 24912.0 40.0 1934460.0 349568.0 205.0 735.0 6502680.0 14.0 364182.0 276.0 7.0 0.0 0.0 27.0 3.0 0.0 0.0 0.0 18.0 ... 0.0oslk 0.0LM8l689qOp 0.0ELof 0.0xb3V 0.0RAYp 0.0mj86 0.0P1WvyxLp3Z 0.02Knk1KF 0.0YFAj 0.0Bnunsla 0.0o64y9zI 0.0JDd6 0.0FbIm 0.0XfqtO3UdzaXh_ 0.0XTbqizz 0.0qMoY 0.0hN8KpA1 0.06hQ9lNX 0.055YFVY9 0.027.0 0.03.0 0.00.0 0.00.0 0.00.0 0.018.0 0.00.0 0.00.0 0.0taul 0.01K8T 0.09_Y1 0.0VpdQ 0.0haYg 0.0me75fM6ugJ 0.0kIsH 0.0uKAI 0.0L84s 0.0cJvF 0.0FzaX 0.0oslk 0.0LM8l689qOp
2 1953.0 448.0 176.0 220.0 0.0 72.0 311.76 4698780.0 5.981628 166 245.0 3025152.0 85266.00 35.0 0.0 53388.0 40.0 48.0 -124655.2 590.0 78.0 7218.0 36.0 3148410.0 1086210.0 400.0 0.0 10569040.0 18.0 0.0 NaN 7.0 0.0 0.0 18.0 3.0 0.0 0.0 0.0 27.0 ... 0.0zCkv 0.0LM8l689qOp 0.0nan 0.0FSa2 0.0ZI9m 0.0mj86 0.0FoxgUHSK8h 0.0LrdZy8QqgUfkVShG 0.0TyGl 0.0fhk21Ss 0.0nQUveAzAF7 0.0dnwD 0.0mTeA 0.04kVnq_T26xq1p 0.0pMWBUmQ 0.0qLXr 0.04UxGlow 0.0catzS2D 0.0ib5G6X1eUxUn6 0.018.0 0.03.0 0.00.0 0.00.0 0.00.0 0.027.0 0.00.0 0.00.0 0.0taul 0.01K8T 0.09_Y1 0.0VpdQ 0.0hAFG 0.07M47J5GA0pTYIFxg5uy 0.0kIsH 0.0uKAI 0.0L84s 0.0UYBR 0.0FzaX 0.0zCkv 0.0LM8l689qOp
3 1533.0 4.0 332.0 415.0 0.0 144.0 220.08 864384.0 5.108097 30 0.0 2642240.0 74107.20 10.0 2.0 NaN 32.0 32.0 378473.6 1435.0 24.0 693.0 NaN 7066700.0 650390.0 5.0 0.0 9676200.0 108.0 253284.0 NaN 7.0 5.0 0.0 9.0 nan 0.0 8.0 0.0 0.0 ... 5.0oslk 5.0LM8l689qOp 5.0nan 5.0xb3V 5.0RAYp 5.0nan 5.0vNEvyxLp3Z 5.0RO12 5.00Xwj 5.0uoZk2Zj 5.0LWyxgtXeJL 5.0CwmB 5.0vzJD 5.0NhsEn4L 5.0kZJtVhC 5.0JC0e 5.0ylCK5YS 5.0e4lqvY0 5.0F2FyR07IdsN7I 0.09.0 0.0nan 0.00.0 0.08.0 0.00.0 0.00.0 0.00.0 0.00.0 0.0taul 0.01K8T 0.0F3hy 0.0VpdQ 0.0IYzP 0.0me75fM6ugJ 0.0kIsH 0.0uKAI 0.0L84s 0.0cJvF 0.0FzaX 0.0oslk 0.0LM8l689qOp
4 686.0 0.0 160.0 200.0 2.0 48.0 278.00 4364880.0 0.650716 32 0.0 1440.0 171072.90 25.0 12.0 106455.0 32.0 8.0 142602.4 490.0 60.0 468.0 -28.0 3794460.0 642816.0 225.0 554414.0 10535200.0 24.0 2851284.0 NaN 7.0 0.0 0.0 9.0 3.0 0.0 0.0 0.0 9.0 ... 0.0oslk 0.0LM8l689qOp 0.0nan 0.0WqMG 0.0RAYp 0.0nan 0.04e7gUH7IEC 0.0RO12 0.0vSNn 0.0kugYdIL 0.0ZIXKpoNpqq 0.0625Z 0.0m_h1 0.0NhsEn4L 0.0NGZXfGp 0.0064o 0.0PYpzAu9 0.0MAz3HNj 0.0F2FyR07IdsN7I 0.09.0 0.03.0 0.00.0 0.00.0 0.00.0 0.09.0 0.00.0 0.00.0 0.0taul 0.01K8T 0.09_Y1 0.0sJzTlal 0.0zm5i 0.0me75fM6ugJ 0.0kIsH 0.0uKAI 0.0L84s 0.0cJvF 0.0FzaX 0.0oslk 0.0LM8l689qOp

5 rows × 572 columns

In [67]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=210,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.08,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )
 

estimator_cb.fit(data[good_columns2], labels, cat_features = cat_cols_selected,verbose=10, plot=True)
              
probs = estimator_cb.predict_proba(data_test[good_columns2])
write_to_submission_file(probs[:,1],out_file='submission_cb_new_fe_final2_selected_fe.csv')
0:	learn: 0.5598995	total: 155ms	remaining: 32.3s
10:	learn: 0.5849296	total: 1.52s	remaining: 27.6s
20:	learn: 0.5852466	total: 2.8s	remaining: 25.2s
30:	learn: 0.6562598	total: 4.23s	remaining: 24.4s
40:	learn: 0.6972492	total: 5.78s	remaining: 23.8s
50:	learn: 0.7155094	total: 7.35s	remaining: 22.9s
60:	learn: 0.7216443	total: 8.9s	remaining: 21.7s
70:	learn: 0.7267132	total: 10.4s	remaining: 20.4s
80:	learn: 0.7314711	total: 12s	remaining: 19.1s
90:	learn: 0.7351927	total: 13.5s	remaining: 17.7s
100:	learn: 0.7400613	total: 15.1s	remaining: 16.3s
110:	learn: 0.7445844	total: 16.7s	remaining: 14.9s
120:	learn: 0.7476029	total: 18.3s	remaining: 13.4s
130:	learn: 0.7498043	total: 19.9s	remaining: 12s
140:	learn: 0.7508988	total: 21.4s	remaining: 10.5s
150:	learn: 0.7527474	total: 22.9s	remaining: 8.94s
160:	learn: 0.7544369	total: 24.4s	remaining: 7.42s
170:	learn: 0.7551470	total: 25.9s	remaining: 5.9s
180:	learn: 0.7559664	total: 27.3s	remaining: 4.38s
190:	learn: 0.7565430	total: 28.8s	remaining: 2.86s
200:	learn: 0.7577809	total: 30.3s	remaining: 1.35s
209:	learn: 0.7584895	total: 31.6s	remaining: 0us

image.png

In [ ]:
 
In [68]:
data_plus_new_num_train = normed_fe_interaction_2_3_4_5(feats_train[good_columns],max_feats = 100,num_columns=num_columns)
data_plus_new_num_val = normed_fe_interaction_2_3_4_5(feats_val[good_columns],max_feats = 100,num_columns=num_columns)

data_plus_num_cat_train,cat_columns_new = categ_fe_interaction(data_plus_new_num_train, max_feats = 100,cat_columns=cat_columns)
data_plus_num_cat_val = categ_fe_interaction(data_plus_new_num_val, max_feats = 100,cat_columns=cat_columns)[0]

data_train = cat_prep(data_plus_num_cat_train,cat_columns_new)
data_val = cat_prep(data_plus_num_cat_val,cat_columns_new)

data_train.head()
Out[68]:
Var6 Var13 Var21 Var22 Var24 Var25 Var28 Var38 Var57 Var73 Var74 Var76 Var81 Var83 Var85 Var94 Var109 Var112 Var113 Var119 Var123 Var125 Var126 Var133 Var134 Var140 Var149 Var153 Var160 Var163 Var189 Var7 Var35 Var44 Var65 Var72 Var78 Var132 Var143 Var144 ... Var35,Var221_mix Var35,Var223_mix Var35,Var225_mix Var35,Var226_mix Var35,Var227_mix Var35,Var229_mix Var35,Var192_mix Var35,Var193_mix Var35,Var197_mix Var35,Var198_mix Var35,Var199_mix Var35,Var202_mix Var35,Var204_mix Var35,Var212_mix Var35,Var216_mix Var35,Var217_mix Var35,Var220_mix Var35,Var222_mix Var35,Var228_mix Var44,Var65_mix Var44,Var72_mix Var44,Var78_mix Var44,Var132_mix Var44,Var143_mix Var44,Var144_mix Var44,Var173_mix Var44,Var181_mix Var44,Var195_mix Var44,Var196_mix Var44,Var203_mix Var44,Var205_mix Var44,Var206_mix Var44,Var207_mix Var44,Var208_mix Var44,Var210_mix Var44,Var211_mix Var44,Var218_mix Var44,Var219_mix Var44,Var221_mix Var44,Var223_mix
6892 NaN NaN NaN NaN NaN NaN NaN NaN 0.657125 8 NaN NaN NaN NaN NaN NaN NaN NaN -1395772.00 NaN NaN NaN -18.0 NaN NaN NaN NaN NaN NaN NaN NaN nan nan nan nan nan nan nan nan nan ... nanoslk nanLM8l689qOp nannan nanQu4f nanRAYp nannan nanDHeTmBftjz nanRO12 nan7gSz nan8ij6Lg8 nanLJF4fPp nanBcur nanZ5OU nanNhsEn4L nan7WwCtIM nan1GbF nan7OmVzos nan76DJixu nanF2FyR07IdsN7I nannan nannan nannan nannan nannan nannan nannan nannan nantaul nan1K8T nan9_Y1 nanVpdQ nannan nanme75fM6ugJ nankIsH nanuKAI nanMtgm nancJvF nanAU8pNoi nanoslk nanLM8l689qOp
34821 NaN NaN NaN NaN NaN NaN NaN NaN 0.117069 10 NaN NaN NaN NaN NaN NaN NaN NaN 390151.60 NaN NaN NaN 4.0 NaN NaN NaN NaN NaN NaN NaN NaN nan nan nan nan nan nan nan nan nan ... nanoslk nannan nannan nanw_Ub nanRAYp nannan nan2jigUH7ejg nanRO12 nandm89 nanLG0vbUP nann1zVHpT8NN nan5FzM nanDmlN nanNhsEn4L nanmAja5EA nanFJ56cYO nansE0uLpj nanG9maF5M nanF2FyR07IdsN7I nannan nannan nannan nannan nannan nannan nannan nannan nantaul nan1K8T nan9_Y1 nan09_Q nannan nanme75fM6ugJ nankIsH nanuKAI nanL84s nancJvF nannan nanoslk nannan
34190 98.0 0.0 152.0 190.0 0.0 64.0 86.96 4107204.0 2.271523 14 0.0 691200.0 147468.60 25.0 10.0 252417.0 24.0 8.0 -150260.80 315.0 66.0 0.0 -26.0 1585040.0 728196.0 0.0 0.0 8437160.0 16.0 1426026.0 NaN 0.0 5.0 0.0 18.0 6.0 0.0 8.0 0.0 0.0 ... 5.0oslk 5.0LM8l689qOp 5.0nan 5.0Qu4f 5.0RAYp 5.0nan 5.0zcRZptzip9 5.0RO12 5.0USOt 5.0pro8v8X 5.0CsjH_hi 5.0rUBc 5.0t_4G 5.0NhsEn4L 5.0mAjDcoz 5.0xYrN 5.0meWVy8V 5.0DQ3u3MC 5.0F2FyR07IdsN7I 0.018.0 0.06.0 0.00.0 0.08.0 0.00.0 0.00.0 0.00.0 0.00.0 0.0taul 0.01K8T 0.09_Y1 0.0VpdQ 0.0IYzP 0.0me75fM6ugJ 0.0sBgB 0.0uKAI 0.0L84s 0.0cJvF 0.0FzaX 0.0oslk 0.0LM8l689qOp
24541 938.0 520.0 148.0 185.0 2.0 96.0 186.64 749586.0 0.714591 160 77.0 2013720.0 100805.70 5.0 10.0 168123.0 48.0 32.0 32282.84 600.0 30.0 68571.0 NaN 3873950.0 2044820.0 4160.0 604800.0 10517440.0 20.0 2592000.0 NaN 7.0 0.0 0.0 18.0 6.0 0.0 0.0 0.0 18.0 ... 0.0zCkv 0.0jySVZNlOJy 0.0ELof 0.0WqMG 0.0ZI9m 0.0mj86 0.0mzKvyx8zhV 0.02Knk1KF 0.0vSNn 0.0fhk21Ss 0.0Hz673939hSRjL 0.0W9XQ 0.0QMes 0.0Ie_5MZs 0.0XTbjhEX 0.0F6F0 0.04UxGlow 0.0catzS2D 0.0TCU50_Yjmm6GIBZ0lL_ 0.018.0 0.06.0 0.00.0 0.00.0 0.00.0 0.018.0 0.00.0 0.00.0 0.0taul 0.01K8T 0.09_Y1 0.0VpdQ 0.0wMei 0.07M47J5GA0pTYIFxg5uy 0.0kIsH 0.0uKAI 0.0L84s 0.0cJvF 0.0FzaX 0.0zCkv 0.0jySVZNlOJy
31483 602.0 88.0 0.0 0.0 NaN 0.0 166.56 0.0 3.250160 30 0.0 0.0 6388.71 0.0 0.0 1194.0 NaN 0.0 81176.80 100.0 0.0 9657.0 4.0 0.0 0.0 385.0 NaN 0.0 0.0 0.0 NaN 7.0 0.0 0.0 9.0 3.0 0.0 0.0 0.0 0.0 ... 0.0oslk 0.0LM8l689qOp 0.0nan 0.0FSa2 0.0RAYp 0.0nan 0.0639qrQK2Mx 0.0RO12 0.0AHgj 0.0creg0bq 0.05q1hF23 0.06Yf9 0.015m3 0.0NhsEn4L 0.0mAjbk_S 0.0oLcf 0.0VgKv48t 0.0nRgz4Af 0.0F2FyR07IdsN7I 0.09.0 0.03.0 0.00.0 0.00.0 0.00.0 0.00.0 0.00.0 0.00.0 0.0taul 0.01K8T 0.09_Y1 0.0sJzTlal 0.0IYzP 0.0me75fM6ugJ 0.0kIsH 0.0uKAI 0.0L84s 0.0cJvF 0.0FzaX 0.0oslk 0.0LM8l689qOp

5 rows × 572 columns

In [69]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=380,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.08,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )



estimator_cb.fit(data_train, labels_train, cat_features = cat_columns_new,verbose=10, plot=True,
                 eval_set = (data_val, labels_val))
0:	learn: 0.5382635	test: 0.5467659	best: 0.5467659 (0)	total: 412ms	remaining: 2m 36s
10:	learn: 0.5693869	test: 0.5804114	best: 0.5804286 (7)	total: 3.42s	remaining: 1m 54s
20:	learn: 0.5804643	test: 0.5828246	best: 0.5833173 (18)	total: 6.62s	remaining: 1m 53s
30:	learn: 0.6618924	test: 0.6572264	best: 0.6572264 (30)	total: 9.88s	remaining: 1m 51s
40:	learn: 0.7029517	test: 0.6972320	best: 0.6972320 (40)	total: 13.6s	remaining: 1m 52s
50:	learn: 0.7189406	test: 0.7103220	best: 0.7103220 (50)	total: 17.3s	remaining: 1m 51s
60:	learn: 0.7278008	test: 0.7160044	best: 0.7162795 (58)	total: 21s	remaining: 1m 49s
70:	learn: 0.7362452	test: 0.7207040	best: 0.7207040 (70)	total: 24.6s	remaining: 1m 47s
80:	learn: 0.7411616	test: 0.7222482	best: 0.7227125 (76)	total: 28.2s	remaining: 1m 44s
90:	learn: 0.7453185	test: 0.7242682	best: 0.7242682 (90)	total: 31.9s	remaining: 1m 41s
100:	learn: 0.7499139	test: 0.7258927	best: 0.7258927 (100)	total: 35.6s	remaining: 1m 38s
110:	learn: 0.7521747	test: 0.7277547	best: 0.7277547 (110)	total: 39.3s	remaining: 1m 35s
120:	learn: 0.7542767	test: 0.7286820	best: 0.7287065 (119)	total: 42.9s	remaining: 1m 31s
130:	learn: 0.7565570	test: 0.7289954	best: 0.7291194 (129)	total: 46.5s	remaining: 1m 28s
140:	learn: 0.7581410	test: 0.7298802	best: 0.7298802 (140)	total: 49.8s	remaining: 1m 24s
150:	learn: 0.7594313	test: 0.7304596	best: 0.7304596 (150)	total: 53.1s	remaining: 1m 20s
160:	learn: 0.7624784	test: 0.7317170	best: 0.7322415 (158)	total: 56.5s	remaining: 1m 16s
170:	learn: 0.7639508	test: 0.7316163	best: 0.7322415 (158)	total: 59.8s	remaining: 1m 13s
180:	learn: 0.7647398	test: 0.7319501	best: 0.7322415 (158)	total: 1m 3s	remaining: 1m 9s
190:	learn: 0.7666140	test: 0.7326687	best: 0.7326687 (190)	total: 1m 6s	remaining: 1m 5s
200:	learn: 0.7692206	test: 0.7330978	best: 0.7331008 (199)	total: 1m 9s	remaining: 1m 2s
210:	learn: 0.7708567	test: 0.7338313	best: 0.7338313 (210)	total: 1m 13s	remaining: 58.7s
220:	learn: 0.7718083	test: 0.7331291	best: 0.7338313 (210)	total: 1m 16s	remaining: 55.1s
230:	learn: 0.7726004	test: 0.7328756	best: 0.7338313 (210)	total: 1m 19s	remaining: 51.5s
240:	learn: 0.7741064	test: 0.7328718	best: 0.7338313 (210)	total: 1m 23s	remaining: 48s
250:	learn: 0.7751329	test: 0.7326592	best: 0.7338313 (210)	total: 1m 26s	remaining: 44.5s
260:	learn: 0.7758482	test: 0.7326486	best: 0.7338313 (210)	total: 1m 29s	remaining: 40.9s
270:	learn: 0.7770815	test: 0.7327316	best: 0.7338313 (210)	total: 1m 33s	remaining: 37.5s
280:	learn: 0.7784941	test: 0.7323765	best: 0.7338313 (210)	total: 1m 36s	remaining: 34.1s
290:	learn: 0.7796969	test: 0.7326615	best: 0.7338313 (210)	total: 1m 40s	remaining: 30.7s
300:	learn: 0.7805603	test: 0.7327461	best: 0.7338313 (210)	total: 1m 43s	remaining: 27.2s
310:	learn: 0.7813696	test: 0.7326881	best: 0.7338313 (210)	total: 1m 47s	remaining: 23.8s
320:	learn: 0.7828450	test: 0.7324444	best: 0.7338313 (210)	total: 1m 50s	remaining: 20.3s
330:	learn: 0.7837311	test: 0.7319540	best: 0.7338313 (210)	total: 1m 53s	remaining: 16.9s
340:	learn: 0.7849347	test: 0.7322737	best: 0.7338313 (210)	total: 1m 57s	remaining: 13.4s
350:	learn: 0.7862632	test: 0.7324462	best: 0.7338313 (210)	total: 2m	remaining: 9.96s
360:	learn: 0.7866774	test: 0.7327055	best: 0.7338313 (210)	total: 2m 3s	remaining: 6.51s
370:	learn: 0.7878807	test: 0.7327363	best: 0.7338313 (210)	total: 2m 7s	remaining: 3.08s
379:	learn: 0.7884513	test: 0.7329235	best: 0.7338313 (210)	total: 2m 10s	remaining: 0us
bestTest = 0.7338312864
bestIteration = 210
Shrink model to first 211 iterations.
Out[69]:
<catboost.core.CatBoostClassifier at 0x7fe78224c358>
In [70]:
num_cols_selected, cat_cols_selected ,importances = feature_selection_loop_prediction(estimator_cb,data_val,labels_val,cat_columns_new,threshold=0)
----------
Var6 diff_metric: 0.0
----------
----------
Var13 diff_metric: -5.1922906884271924e-05
----------
----------
good_num_columns: ['Var13']
----------
Var21 diff_metric: 0.0
----------
----------
Var22 diff_metric: 0.0
----------
----------
Var24 diff_metric: 0.0
----------
----------
Var25 diff_metric: 0.0
----------
----------
Var28 diff_metric: 0.0
----------
----------
Var38 diff_metric: 0.0
----------
----------
Var57 diff_metric: 0.0
----------
----------
Var73 diff_metric: -0.0012246748542201757
----------
----------
good_num_columns: ['Var13', 'Var73']
----------
Var74 diff_metric: -0.0014267204957660873
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74']
----------
Var76 diff_metric: 0.0
----------
----------
Var81 diff_metric: 0.0012629869020182038
----------
----------
Var83 diff_metric: 0.0
----------
----------
Var85 diff_metric: 0.0
----------
----------
Var94 diff_metric: 0.0
----------
----------
Var109 diff_metric: 0.0
----------
----------
Var112 diff_metric: 0.0
----------
----------
Var113 diff_metric: -0.00258324023337686
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var113']
----------
Var119 diff_metric: 0.0
----------
----------
Var123 diff_metric: 0.0
----------
----------
Var125 diff_metric: 0.0
----------
----------
Var126 diff_metric: -0.04031375147438365
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var113', 'Var126']
----------
Var133 diff_metric: 2.1777374537901295e-05
----------
----------
Var134 diff_metric: 0.0
----------
----------
Var140 diff_metric: 0.0
----------
----------
Var149 diff_metric: 0.0
----------
----------
Var153 diff_metric: 0.0
----------
----------
Var160 diff_metric: 0.0
----------
----------
Var163 diff_metric: 0.00015849089246999792
----------
----------
Var189 diff_metric: -0.003076154974653078
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var113', 'Var126', 'Var189']
----------
Var7 diff_metric: -0.000223520552548373
----------
----------
good_cat_columns: ['Var7']
----------
Var35 diff_metric: 0.0
----------
----------
Var44 diff_metric: 0.0
----------
----------
Var65 diff_metric: -0.0002443905364806209
----------
----------
good_cat_columns: ['Var7', 'Var65']
----------
Var72 diff_metric: 0.0
----------
----------
Var78 diff_metric: -2.419708281964361e-06
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78']
----------
Var132 diff_metric: -2.550775813936479e-05
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132']
----------
Var143 diff_metric: 0.0
----------
----------
Var144 diff_metric: 0.0
----------
----------
Var173 diff_metric: 0.0
----------
----------
Var181 diff_metric: 0.0
----------
----------
Var195 diff_metric: 0.0
----------
----------
Var196 diff_metric: 0.0
----------
----------
Var203 diff_metric: 0.0
----------
----------
Var205 diff_metric: -0.0005179183935235532
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205']
----------
Var206 diff_metric: -0.00017704198929868742
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206']
----------
Var207 diff_metric: -0.00015939828307587334
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207']
----------
Var208 diff_metric: 0.0
----------
----------
Var210 diff_metric: -0.0014264180322307585
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210']
----------
Var211 diff_metric: 0.0
----------
----------
Var218 diff_metric: 0.0
----------
----------
Var219 diff_metric: -0.0004128627256139161
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219']
----------
Var221 diff_metric: 0.0
----------
----------
Var223 diff_metric: 8.307665101481287e-05
----------
----------
Var225 diff_metric: 0.0
----------
----------
Var226 diff_metric: 0.00025870714381559523
----------
----------
Var227 diff_metric: 0.0
----------
----------
Var229 diff_metric: 0.0
----------
----------
Var192 diff_metric: -0.0015981164990733765
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192']
----------
Var193 diff_metric: 0.00021364007706359267
----------
----------
Var197 diff_metric: 1.683713679545562e-05
----------
----------
Var198 diff_metric: 0.0005858718677758112
----------
----------
Var199 diff_metric: -0.004567300203426883
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199']
----------
Var202 diff_metric: 7.329699670843848e-05
----------
----------
Var204 diff_metric: -8.741196168671195e-05
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204']
----------
Var212 diff_metric: -0.0004672053407801524
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212']
----------
Var216 diff_metric: 3.6396445408204414e-05
----------
----------
Var217 diff_metric: -0.0005945928997088679
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217']
----------
Var220 diff_metric: -1.4518249692008212e-05
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220']
----------
Var222 diff_metric: 0.00014528331809759987
----------
----------
Var228 diff_metric: -3.054881706021639e-05
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228']
----------
Var6,Var13_mix diff_metric: 0.0
----------
----------
Var6,Var21_mix diff_metric: 0.0
----------
----------
Var6,Var22_mix diff_metric: 0.0
----------
----------
Var6,Var24_mix diff_metric: 0.0
----------
----------
Var6,Var25_mix diff_metric: 0.0
----------
----------
Var6,Var28_mix diff_metric: 0.0
----------
----------
Var6,Var38_mix diff_metric: 0.0
----------
----------
Var6,Var57_mix diff_metric: 0.0
----------
----------
Var6,Var73_mix diff_metric: 1.8450275650172543e-05
----------
----------
Var6,Var74_mix diff_metric: 0.0
----------
----------
Var6,Var76_mix diff_metric: 0.0
----------
----------
Var6,Var81_mix diff_metric: 0.0002556825084631953
----------
----------
Var6,Var83_mix diff_metric: -0.00010303924434118539
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var113', 'Var126', 'Var189', 'Var6,Var83_mix']
----------
Var6,Var85_mix diff_metric: 0.0
----------
----------
Var6,Var94_mix diff_metric: 0.0
----------
----------
Var6,Var109_mix diff_metric: 0.0
----------
----------
Var6,Var112_mix diff_metric: 0.0
----------
----------
Var6,Var113_mix diff_metric: -6.129927647691158e-05
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var113', 'Var126', 'Var189', 'Var6,Var83_mix', 'Var6,Var113_mix']
----------
Var6,Var119_mix diff_metric: 0.0
----------
----------
Var6,Var123_mix diff_metric: 0.0
----------
----------
Var6,Var125_mix diff_metric: 0.0
----------
----------
Var6,Var126_mix diff_metric: -0.00010142610548657949
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var113', 'Var126', 'Var189', 'Var6,Var83_mix', 'Var6,Var113_mix', 'Var6,Var126_mix']
----------
Var6,Var133_mix diff_metric: 0.0
----------
----------
Var6,Var134_mix diff_metric: 0.0
----------
----------
Var6,Var140_mix diff_metric: 0.0
----------
----------
Var6,Var149_mix diff_metric: 0.0
----------
----------
Var6,Var153_mix diff_metric: 0.0
----------
----------
Var6,Var160_mix diff_metric: 0.0
----------
----------
Var6,Var163_mix diff_metric: 0.0
----------
----------
Var6,Var189_mix diff_metric: 0.0
----------
----------
Var13,Var21_mix diff_metric: 0.0
----------
----------
Var13,Var22_mix diff_metric: 0.0
----------
----------
Var13,Var24_mix diff_metric: 0.0
----------
----------
Var13,Var25_mix diff_metric: 0.0
----------
----------
Var13,Var28_mix diff_metric: 5.827464112451164e-05
----------
----------
Var13,Var38_mix diff_metric: 1.5123176761999702e-06
----------
----------
Var13,Var57_mix diff_metric: 0.0
----------
----------
Var13,Var73_mix diff_metric: 0.0
----------
----------
Var13,Var74_mix diff_metric: 0.0
----------
----------
Var13,Var76_mix diff_metric: 0.0
----------
----------
Var13,Var81_mix diff_metric: 0.0
----------
----------
Var13,Var83_mix diff_metric: 0.0
----------
----------
Var13,Var85_mix diff_metric: 0.0
----------
----------
Var13,Var94_mix diff_metric: 0.0
----------
----------
Var13,Var109_mix diff_metric: -0.000651405633746438
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var113', 'Var126', 'Var189', 'Var6,Var83_mix', 'Var6,Var113_mix', 'Var6,Var126_mix', 'Var13,Var109_mix']
----------
Var13,Var112_mix diff_metric: 0.0
----------
----------
Var13,Var113_mix diff_metric: 0.0
----------
----------
Var13,Var119_mix diff_metric: 0.0
----------
----------
Var13,Var123_mix diff_metric: 9.275548414233725e-06
----------
----------
Var13,Var125_mix diff_metric: 0.0
----------
----------
Var13,Var126_mix diff_metric: -0.006432693646481136
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var113', 'Var126', 'Var189', 'Var6,Var83_mix', 'Var6,Var113_mix', 'Var6,Var126_mix', 'Var13,Var109_mix', 'Var13,Var126_mix']
----------
Var13,Var133_mix diff_metric: 0.0
----------
----------
Var13,Var134_mix diff_metric: 0.0
----------
----------
Var13,Var140_mix diff_metric: 0.0
----------
----------
Var13,Var149_mix diff_metric: 0.0
----------
----------
Var13,Var153_mix diff_metric: 0.0
----------
----------
Var13,Var160_mix diff_metric: 0.0
----------
----------
Var13,Var163_mix diff_metric: 0.0
----------
----------
Var13,Var189_mix diff_metric: -0.00013731844500275425
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var113', 'Var126', 'Var189', 'Var6,Var83_mix', 'Var6,Var113_mix', 'Var6,Var126_mix', 'Var13,Var109_mix', 'Var13,Var126_mix', 'Var13,Var189_mix']
----------
Var21,Var22_mix diff_metric: 0.0
----------
----------
Var21,Var24_mix diff_metric: 0.0
----------
----------
Var21,Var25_mix diff_metric: 0.0
----------
----------
Var21,Var28_mix diff_metric: 0.0
----------
----------
Var21,Var38_mix diff_metric: 0.0
----------
----------
Var21,Var57_mix diff_metric: 0.0
----------
----------
Var21,Var73_mix diff_metric: 7.793477091555534e-05
----------
----------
Var21,Var74_mix diff_metric: 0.0
----------
----------
Var21,Var76_mix diff_metric: 0.0
----------
----------
Var21,Var81_mix diff_metric: 0.0
----------
----------
Var21,Var83_mix diff_metric: 0.0
----------
----------
Var21,Var85_mix diff_metric: 0.0
----------
----------
Var21,Var94_mix diff_metric: 0.0
----------
----------
Var21,Var109_mix diff_metric: 0.0
----------
----------
Var21,Var112_mix diff_metric: 0.0
----------
----------
Var21,Var113_mix diff_metric: 0.0
----------
----------
Var21,Var119_mix diff_metric: 0.0
----------
----------
Var21,Var123_mix diff_metric: 0.0002490283106878488
----------
----------
Var21,Var125_mix diff_metric: 0.0
----------
----------
Var21,Var126_mix diff_metric: -0.0023384464121826154
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var113', 'Var126', 'Var189', 'Var6,Var83_mix', 'Var6,Var113_mix', 'Var6,Var126_mix', 'Var13,Var109_mix', 'Var13,Var126_mix', 'Var13,Var189_mix', 'Var21,Var126_mix']
----------
Var21,Var133_mix diff_metric: 1.794616975803187e-05
----------
----------
Var21,Var134_mix diff_metric: 0.0
----------
----------
Var21,Var140_mix diff_metric: 2.3188871035584313e-06
----------
----------
Var21,Var149_mix diff_metric: 0.0
----------
----------
Var21,Var153_mix diff_metric: 0.0
----------
----------
Var21,Var160_mix diff_metric: 0.00030649638238489096
----------
----------
Var21,Var163_mix diff_metric: 0.0
----------
----------
Var21,Var189_mix diff_metric: 0.0
----------
----------
Var22,Var24_mix diff_metric: 0.0
----------
----------
Var22,Var25_mix diff_metric: 0.0
----------
----------
Var22,Var28_mix diff_metric: 0.0
----------
----------
Var22,Var38_mix diff_metric: 0.0
----------
----------
Var22,Var57_mix diff_metric: 0.0
----------
----------
Var22,Var73_mix diff_metric: 0.00015828925011318606
----------
----------
Var22,Var74_mix diff_metric: 0.0
----------
----------
Var22,Var76_mix diff_metric: 0.0
----------
----------
Var22,Var81_mix diff_metric: 0.0
----------
----------
Var22,Var83_mix diff_metric: 0.0
----------
----------
Var22,Var85_mix diff_metric: 0.0
----------
----------
Var22,Var94_mix diff_metric: 0.0
----------
----------
Var22,Var109_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var25_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var28_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var38_mix diff_metric: -5.817381994621673e-05
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var113', 'Var126', 'Var189', 'Var6,Var83_mix', 'Var6,Var113_mix', 'Var6,Var126_mix', 'Var13,Var109_mix', 'Var13,Var126_mix', 'Var13,Var189_mix', 'Var21,Var126_mix', 'Var6,Var13,Var38_mix']
----------
Var6,Var13,Var57_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var73_mix diff_metric: 0.00036144392462167385
----------
----------
Var6,Var13,Var74_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var76_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var81_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var83_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var85_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var94_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var109_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var112_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var113_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var119_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var123_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var125_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var126_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var133_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var134_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var140_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var149_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var153_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var160_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var163_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var189_mix diff_metric: 9.658668892253974e-05
----------
----------
Var6,Var21,Var22_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var24_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var25_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var28_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var38_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var57_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var73_mix diff_metric: 4.9704840959230445e-05
----------
----------
Var6,Var21,Var74_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var76_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var81_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var83_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var85_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var94_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var109_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var112_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var113_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var119_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var123_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var125_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var126_mix diff_metric: 8.216926040904848e-05
----------
----------
Var6,Var21,Var133_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var134_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var140_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var149_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var153_mix diff_metric: -0.00025396854843007244
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var113', 'Var126', 'Var189', 'Var6,Var83_mix', 'Var6,Var113_mix', 'Var6,Var126_mix', 'Var13,Var109_mix', 'Var13,Var126_mix', 'Var13,Var189_mix', 'Var21,Var126_mix', 'Var6,Var13,Var38_mix', 'Var6,Var21,Var153_mix']
----------
Var6,Var21,Var160_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var163_mix diff_metric: 0.0
----------
----------
Var6,Var21,Var189_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var24_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var25_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var28_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var38_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var57_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var73_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var74_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var76_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var81_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var83_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var85_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var94_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var109_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var112_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var113_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var119_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var123_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var125_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var126_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var133_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var134_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var140_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var149_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var153_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var160_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var163_mix diff_metric: 0.0
----------
----------
Var6,Var22,Var189_mix diff_metric: 0.0
----------
----------
Var6,Var24,Var25_mix diff_metric: 0.0
----------
----------
Var6,Var24,Var28_mix diff_metric: -0.00025205294604024875
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var113', 'Var126', 'Var189', 'Var6,Var83_mix', 'Var6,Var113_mix', 'Var6,Var126_mix', 'Var13,Var109_mix', 'Var13,Var126_mix', 'Var13,Var189_mix', 'Var21,Var126_mix', 'Var6,Var13,Var38_mix', 'Var6,Var21,Var153_mix', 'Var6,Var24,Var28_mix']
----------
Var6,Var24,Var38_mix diff_metric: 0.0
----------
----------
Var6,Var24,Var57_mix diff_metric: 0.0
----------
----------
Var6,Var24,Var73_mix diff_metric: 0.0
----------
----------
Var6,Var24,Var74_mix diff_metric: 0.0
----------
----------
Var6,Var24,Var76_mix diff_metric: 5.938367408708789e-05
----------
----------
Var6,Var24,Var81_mix diff_metric: 0.0
----------
----------
Var6,Var24,Var83_mix diff_metric: 0.0
----------
----------
Var6,Var24,Var85_mix diff_metric: 0.0
----------
----------
Var6,Var24,Var94_mix diff_metric: -0.0001099959056519717
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var113', 'Var126', 'Var189', 'Var6,Var83_mix', 'Var6,Var113_mix', 'Var6,Var126_mix', 'Var13,Var109_mix', 'Var13,Var126_mix', 'Var13,Var189_mix', 'Var21,Var126_mix', 'Var6,Var13,Var38_mix', 'Var6,Var21,Var153_mix', 'Var6,Var24,Var28_mix', 'Var6,Var24,Var94_mix']
----------
Var6,Var24,Var109_mix diff_metric: 0.0
----------
----------
Var6,Var24,Var112_mix diff_metric: 0.0
----------
----------
Var6,Var24,Var113_mix diff_metric: 0.0
----------
----------
Var6,Var24,Var119_mix diff_metric: 0.0
----------
----------
Var6,Var24,Var123_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22_mix diff_metric: -0.00023975276227339304
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var113', 'Var126', 'Var189', 'Var6,Var83_mix', 'Var6,Var113_mix', 'Var6,Var126_mix', 'Var13,Var109_mix', 'Var13,Var126_mix', 'Var13,Var189_mix', 'Var21,Var126_mix', 'Var6,Var13,Var38_mix', 'Var6,Var21,Var153_mix', 'Var6,Var24,Var28_mix', 'Var6,Var24,Var94_mix', 'Var6,Var13,Var21,Var22_mix']
----------
Var6,Var13,Var21,Var24_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var28_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var38_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var57_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var73_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var74_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var76_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var81_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var83_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var85_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var94_mix diff_metric: 3.9925186652745026e-05
----------
----------
Var6,Var13,Var21,Var109_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var112_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var113_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var119_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var123_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var125_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var126_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var133_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var134_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var140_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var149_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var153_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var160_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var163_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var189_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var24_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var25_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var28_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var38_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var57_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var73_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var74_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var76_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var81_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var83_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var85_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var94_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var109_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var112_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var113_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var119_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var123_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var125_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var126_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var133_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var134_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var140_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var149_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var153_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var160_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var163_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var22,Var189_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var25_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var28_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var38_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var57_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var73_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var74_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var76_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var81_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var83_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var85_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var94_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var109_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var112_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var113_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var119_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var123_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var125_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var126_mix diff_metric: -0.00112365203344722
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var113', 'Var126', 'Var189', 'Var6,Var83_mix', 'Var6,Var113_mix', 'Var6,Var126_mix', 'Var13,Var109_mix', 'Var13,Var126_mix', 'Var13,Var189_mix', 'Var21,Var126_mix', 'Var6,Var13,Var38_mix', 'Var6,Var21,Var153_mix', 'Var6,Var24,Var28_mix', 'Var6,Var24,Var94_mix', 'Var6,Var13,Var21,Var22_mix', 'Var6,Var13,Var24,Var126_mix']
----------
Var6,Var13,Var24,Var133_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var134_mix diff_metric: 5.0309768029666024e-05
----------
----------
Var6,Var13,Var24,Var140_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var149_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var153_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var160_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var163_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var24,Var189_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var25,Var28_mix diff_metric: -0.000857937817731691
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var113', 'Var126', 'Var189', 'Var6,Var83_mix', 'Var6,Var113_mix', 'Var6,Var126_mix', 'Var13,Var109_mix', 'Var13,Var126_mix', 'Var13,Var189_mix', 'Var21,Var126_mix', 'Var6,Var13,Var38_mix', 'Var6,Var21,Var153_mix', 'Var6,Var24,Var28_mix', 'Var6,Var24,Var94_mix', 'Var6,Var13,Var21,Var22_mix', 'Var6,Var13,Var24,Var126_mix', 'Var6,Var13,Var25,Var28_mix']
----------
Var6,Var13,Var25,Var38_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var25,Var57_mix diff_metric: -0.0002465077812273675
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var113', 'Var126', 'Var189', 'Var6,Var83_mix', 'Var6,Var113_mix', 'Var6,Var126_mix', 'Var13,Var109_mix', 'Var13,Var126_mix', 'Var13,Var189_mix', 'Var21,Var126_mix', 'Var6,Var13,Var38_mix', 'Var6,Var21,Var153_mix', 'Var6,Var24,Var28_mix', 'Var6,Var24,Var94_mix', 'Var6,Var13,Var21,Var22_mix', 'Var6,Var13,Var24,Var126_mix', 'Var6,Var13,Var25,Var28_mix', 'Var6,Var13,Var25,Var57_mix']
----------
Var6,Var13,Var25,Var73_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var25,Var74_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var25,Var76_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var25,Var81_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var25,Var83_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var25,Var85_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var25,Var94_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var25,Var109_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var25,Var112_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var25,Var113_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var25,Var119_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var25,Var123_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var25,Var125_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var25,Var126_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var25,Var133_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var25,Var134_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var24_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var25_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var28_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var38_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var57_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var73_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var74_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var76_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var81_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var83_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var85_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var94_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var109_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var112_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var113_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var119_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var123_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var125_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var126_mix diff_metric: 7.894298269972566e-05
----------
----------
Var6,Var13,Var21,Var22,Var133_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var134_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var140_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var149_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var153_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var160_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var163_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var22,Var189_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var25_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var28_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var38_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var57_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var73_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var74_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var76_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var81_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var83_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var85_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var94_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var109_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var112_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var113_mix diff_metric: 4.345392789728564e-05
----------
----------
Var6,Var13,Var21,Var24,Var119_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var123_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var125_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var126_mix diff_metric: -0.00020597766750396485
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var113', 'Var126', 'Var189', 'Var6,Var83_mix', 'Var6,Var113_mix', 'Var6,Var126_mix', 'Var13,Var109_mix', 'Var13,Var126_mix', 'Var13,Var189_mix', 'Var21,Var126_mix', 'Var6,Var13,Var38_mix', 'Var6,Var21,Var153_mix', 'Var6,Var24,Var28_mix', 'Var6,Var24,Var94_mix', 'Var6,Var13,Var21,Var22_mix', 'Var6,Var13,Var24,Var126_mix', 'Var6,Var13,Var25,Var28_mix', 'Var6,Var13,Var25,Var57_mix', 'Var6,Var13,Var21,Var24,Var126_mix']
----------
Var6,Var13,Var21,Var24,Var133_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var134_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var140_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var149_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var153_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var160_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var163_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var24,Var189_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var28_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var38_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var57_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var73_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var74_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var76_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var81_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var83_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var85_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var94_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var109_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var112_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var113_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var119_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var123_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var125_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var126_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var133_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var134_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var140_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var149_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var153_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var160_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var163_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var25,Var189_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var28,Var38_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var28,Var57_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var28,Var73_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var28,Var74_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var28,Var76_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var28,Var81_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var28,Var83_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var28,Var85_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var28,Var94_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var28,Var109_mix diff_metric: -3.22627770931172e-05
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var113', 'Var126', 'Var189', 'Var6,Var83_mix', 'Var6,Var113_mix', 'Var6,Var126_mix', 'Var13,Var109_mix', 'Var13,Var126_mix', 'Var13,Var189_mix', 'Var21,Var126_mix', 'Var6,Var13,Var38_mix', 'Var6,Var21,Var153_mix', 'Var6,Var24,Var28_mix', 'Var6,Var24,Var94_mix', 'Var6,Var13,Var21,Var22_mix', 'Var6,Var13,Var24,Var126_mix', 'Var6,Var13,Var25,Var28_mix', 'Var6,Var13,Var25,Var57_mix', 'Var6,Var13,Var21,Var24,Var126_mix', 'Var6,Var13,Var21,Var28,Var109_mix']
----------
Var6,Var13,Var21,Var28,Var112_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var28,Var113_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var28,Var119_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var28,Var123_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var28,Var125_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var28,Var126_mix diff_metric: -0.0005363686691736147
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var113', 'Var126', 'Var189', 'Var6,Var83_mix', 'Var6,Var113_mix', 'Var6,Var126_mix', 'Var13,Var109_mix', 'Var13,Var126_mix', 'Var13,Var189_mix', 'Var21,Var126_mix', 'Var6,Var13,Var38_mix', 'Var6,Var21,Var153_mix', 'Var6,Var24,Var28_mix', 'Var6,Var24,Var94_mix', 'Var6,Var13,Var21,Var22_mix', 'Var6,Var13,Var24,Var126_mix', 'Var6,Var13,Var25,Var28_mix', 'Var6,Var13,Var25,Var57_mix', 'Var6,Var13,Var21,Var24,Var126_mix', 'Var6,Var13,Var21,Var28,Var109_mix', 'Var6,Var13,Var21,Var28,Var126_mix']
----------
Var6,Var13,Var21,Var28,Var133_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var28,Var134_mix diff_metric: -0.00015082848291059214
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var113', 'Var126', 'Var189', 'Var6,Var83_mix', 'Var6,Var113_mix', 'Var6,Var126_mix', 'Var13,Var109_mix', 'Var13,Var126_mix', 'Var13,Var189_mix', 'Var21,Var126_mix', 'Var6,Var13,Var38_mix', 'Var6,Var21,Var153_mix', 'Var6,Var24,Var28_mix', 'Var6,Var24,Var94_mix', 'Var6,Var13,Var21,Var22_mix', 'Var6,Var13,Var24,Var126_mix', 'Var6,Var13,Var25,Var28_mix', 'Var6,Var13,Var25,Var57_mix', 'Var6,Var13,Var21,Var24,Var126_mix', 'Var6,Var13,Var21,Var28,Var109_mix', 'Var6,Var13,Var21,Var28,Var126_mix', 'Var6,Var13,Var21,Var28,Var134_mix']
----------
Var6,Var13,Var21,Var28,Var140_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var28,Var149_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var28,Var153_mix diff_metric: 0.0
----------
----------
Var6,Var13,Var21,Var28,Var160_mix diff_metric: 0.0
----------
----------
Var7,Var35_mix diff_metric: 0.0
----------
----------
Var7,Var44_mix diff_metric: -2.2987228678883476e-05
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix']
----------
Var7,Var65_mix diff_metric: -0.00035801600455553917
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix']
----------
Var7,Var72_mix diff_metric: 0.00015163505233783958
----------
----------
Var7,Var78_mix diff_metric: 1.9559308612748794e-05
----------
----------
Var7,Var132_mix diff_metric: 1.4114964977940403e-06
----------
----------
Var7,Var143_mix diff_metric: 0.0002788713794988906
----------
----------
Var7,Var144_mix diff_metric: -0.00025457347550061904
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix']
----------
Var7,Var173_mix diff_metric: -8.670621343798146e-06
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix']
----------
Var7,Var181_mix diff_metric: 0.0
----------
----------
Var7,Var195_mix diff_metric: 5.978695880071161e-05
----------
----------
Var7,Var196_mix diff_metric: 5.2628655133224456e-05
----------
----------
Var7,Var203_mix diff_metric: -5.343522456058292e-05
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix', 'Var7,Var203_mix']
----------
Var7,Var205_mix diff_metric: -3.44808430182697e-05
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix', 'Var7,Var203_mix', 'Var7,Var205_mix']
----------
Var7,Var206_mix diff_metric: 0.00010223267491382693
----------
----------
Var7,Var207_mix diff_metric: 0.0
----------
----------
Var7,Var208_mix diff_metric: 6.654197775457504e-05
----------
----------
Var7,Var210_mix diff_metric: 0.0005706478698350725
----------
----------
Var7,Var211_mix diff_metric: -0.00010223267491393795
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix', 'Var7,Var203_mix', 'Var7,Var205_mix', 'Var7,Var211_mix']
----------
Var7,Var218_mix diff_metric: -0.0011701305966970166
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix', 'Var7,Var203_mix', 'Var7,Var205_mix', 'Var7,Var211_mix', 'Var7,Var218_mix']
----------
Var7,Var219_mix diff_metric: -0.0002108170840680046
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix', 'Var7,Var203_mix', 'Var7,Var205_mix', 'Var7,Var211_mix', 'Var7,Var218_mix', 'Var7,Var219_mix']
----------
Var7,Var221_mix diff_metric: -0.0002766533135737381
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix', 'Var7,Var203_mix', 'Var7,Var205_mix', 'Var7,Var211_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix']
----------
Var7,Var223_mix diff_metric: 0.00012542154594963328
----------
----------
Var7,Var225_mix diff_metric: -6.432391182942254e-05
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix', 'Var7,Var203_mix', 'Var7,Var205_mix', 'Var7,Var211_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var225_mix']
----------
Var7,Var226_mix diff_metric: 0.0006956661310708601
----------
----------
Var7,Var227_mix diff_metric: -0.00011644846107050633
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix', 'Var7,Var203_mix', 'Var7,Var205_mix', 'Var7,Var211_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var225_mix', 'Var7,Var227_mix']
----------
Var7,Var229_mix diff_metric: 0.0004888818941395368
----------
----------
Var7,Var192_mix diff_metric: -0.000121388698812952
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix', 'Var7,Var203_mix', 'Var7,Var205_mix', 'Var7,Var211_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var225_mix', 'Var7,Var227_mix', 'Var7,Var192_mix']
----------
Var7,Var193_mix diff_metric: -0.00013096671076251454
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix', 'Var7,Var203_mix', 'Var7,Var205_mix', 'Var7,Var211_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var225_mix', 'Var7,Var227_mix', 'Var7,Var192_mix', 'Var7,Var193_mix']
----------
Var7,Var197_mix diff_metric: 0.00022362137372677893
----------
----------
Var7,Var198_mix diff_metric: 0.00011271807746915385
----------
----------
Var7,Var199_mix diff_metric: 5.444343634453119e-05
----------
----------
Var7,Var202_mix diff_metric: -0.0007671483465678808
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix', 'Var7,Var203_mix', 'Var7,Var205_mix', 'Var7,Var211_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var225_mix', 'Var7,Var227_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var202_mix']
----------
Var7,Var204_mix diff_metric: 0.0
----------
----------
Var7,Var212_mix diff_metric: -0.001256836810134887
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix', 'Var7,Var203_mix', 'Var7,Var205_mix', 'Var7,Var211_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var225_mix', 'Var7,Var227_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var202_mix', 'Var7,Var212_mix']
----------
Var7,Var216_mix diff_metric: -0.0003160743943344535
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix', 'Var7,Var203_mix', 'Var7,Var205_mix', 'Var7,Var211_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var225_mix', 'Var7,Var227_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix']
----------
Var7,Var217_mix diff_metric: -0.011478188699135528
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix', 'Var7,Var203_mix', 'Var7,Var205_mix', 'Var7,Var211_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var225_mix', 'Var7,Var227_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix']
----------
Var7,Var220_mix diff_metric: -0.000794974991810804
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix', 'Var7,Var203_mix', 'Var7,Var205_mix', 'Var7,Var211_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var225_mix', 'Var7,Var227_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix']
----------
Var7,Var222_mix diff_metric: 0.00035700779277136885
----------
----------
Var7,Var228_mix diff_metric: 8.40848627989832e-05
----------
----------
Var35,Var44_mix diff_metric: 0.0
----------
----------
Var35,Var65_mix diff_metric: 8.116104862510021e-05
----------
----------
Var35,Var72_mix diff_metric: 2.1374089824277576e-05
----------
----------
Var35,Var78_mix diff_metric: 0.00013066424722729675
----------
----------
Var35,Var132_mix diff_metric: -0.000134697094363867
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix', 'Var7,Var203_mix', 'Var7,Var205_mix', 'Var7,Var211_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var225_mix', 'Var7,Var227_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var132_mix']
----------
Var35,Var143_mix diff_metric: -0.0001485095958069227
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix', 'Var7,Var203_mix', 'Var7,Var205_mix', 'Var7,Var211_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var225_mix', 'Var7,Var227_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var132_mix', 'Var35,Var143_mix']
----------
Var35,Var144_mix diff_metric: -4.214325257800855e-05
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix', 'Var7,Var203_mix', 'Var7,Var205_mix', 'Var7,Var211_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var225_mix', 'Var7,Var227_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix']
----------
Var35,Var173_mix diff_metric: 0.0
----------
----------
Var35,Var181_mix diff_metric: -0.00015103012526729298
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix', 'Var7,Var203_mix', 'Var7,Var205_mix', 'Var7,Var211_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var225_mix', 'Var7,Var227_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var181_mix']
----------
Var35,Var195_mix diff_metric: 1.3106753193881104e-06
----------
----------
Var35,Var196_mix diff_metric: 1.9760950969560653e-05
----------
----------
Var35,Var203_mix diff_metric: 2.5205294602592687e-06
----------
----------
Var35,Var205_mix diff_metric: -5.716560816193539e-05
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix', 'Var7,Var203_mix', 'Var7,Var205_mix', 'Var7,Var211_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var225_mix', 'Var7,Var227_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var181_mix', 'Var35,Var205_mix']
----------
Var35,Var206_mix diff_metric: 0.00011675092460583514
----------
----------
Var35,Var207_mix diff_metric: 1.542564029766158e-05
----------
----------
Var35,Var208_mix diff_metric: 1.623220972490902e-05
----------
----------
Var35,Var210_mix diff_metric: -0.00043726145079059364
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix', 'Var7,Var203_mix', 'Var7,Var205_mix', 'Var7,Var211_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var225_mix', 'Var7,Var227_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var181_mix', 'Var35,Var205_mix', 'Var35,Var210_mix']
----------
Var35,Var211_mix diff_metric: 3.780794190588743e-05
----------
----------
Var35,Var218_mix diff_metric: -0.0003457158207887945
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix', 'Var7,Var203_mix', 'Var7,Var205_mix', 'Var7,Var211_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var225_mix', 'Var7,Var227_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var181_mix', 'Var35,Var205_mix', 'Var35,Var210_mix', 'Var35,Var218_mix']
----------
Var35,Var219_mix diff_metric: -0.000121388698812952
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix', 'Var7,Var203_mix', 'Var7,Var205_mix', 'Var7,Var211_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var225_mix', 'Var7,Var227_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var181_mix', 'Var35,Var205_mix', 'Var35,Var210_mix', 'Var35,Var218_mix', 'Var35,Var219_mix']
----------
Var35,Var221_mix diff_metric: 8.922674289812971e-05
----------
----------
Var35,Var223_mix diff_metric: 0.00039501737703417916
----------
----------
Var35,Var225_mix diff_metric: 4.688184796342032e-05
----------
----------
Var35,Var226_mix diff_metric: 0.0
----------
----------
Var35,Var227_mix diff_metric: -0.00011423039514546485
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix', 'Var7,Var203_mix', 'Var7,Var205_mix', 'Var7,Var211_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var225_mix', 'Var7,Var227_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var181_mix', 'Var35,Var205_mix', 'Var35,Var210_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var227_mix']
----------
Var35,Var229_mix diff_metric: -0.0006579590103432675
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix', 'Var7,Var203_mix', 'Var7,Var205_mix', 'Var7,Var211_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var225_mix', 'Var7,Var227_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var181_mix', 'Var35,Var205_mix', 'Var35,Var210_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var227_mix', 'Var35,Var229_mix']
----------
Var35,Var192_mix diff_metric: -0.002250127059890028
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix', 'Var7,Var203_mix', 'Var7,Var205_mix', 'Var7,Var211_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var225_mix', 'Var7,Var227_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var181_mix', 'Var35,Var205_mix', 'Var35,Var210_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var227_mix', 'Var35,Var229_mix', 'Var35,Var192_mix']
----------
Var35,Var193_mix diff_metric: -7.037318253444447e-05
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix', 'Var7,Var203_mix', 'Var7,Var205_mix', 'Var7,Var211_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var225_mix', 'Var7,Var227_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var181_mix', 'Var35,Var205_mix', 'Var35,Var210_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var227_mix', 'Var35,Var229_mix', 'Var35,Var192_mix', 'Var35,Var193_mix']
----------
Var35,Var197_mix diff_metric: -2.3188871035695335e-05
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix', 'Var7,Var203_mix', 'Var7,Var205_mix', 'Var7,Var211_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var225_mix', 'Var7,Var227_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var181_mix', 'Var35,Var205_mix', 'Var35,Var210_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var227_mix', 'Var35,Var229_mix', 'Var35,Var192_mix', 'Var35,Var193_mix', 'Var35,Var197_mix']
----------
Var35,Var198_mix diff_metric: 0.00036154474579996876
----------
----------
Var35,Var199_mix diff_metric: -0.0004873695764633368
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix', 'Var7,Var203_mix', 'Var7,Var205_mix', 'Var7,Var211_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var225_mix', 'Var7,Var227_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var181_mix', 'Var35,Var205_mix', 'Var35,Var210_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var227_mix', 'Var35,Var229_mix', 'Var35,Var192_mix', 'Var35,Var193_mix', 'Var35,Var197_mix', 'Var35,Var199_mix']
----------
Var35,Var202_mix diff_metric: -4.446213968156698e-05
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix', 'Var7,Var203_mix', 'Var7,Var205_mix', 'Var7,Var211_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var225_mix', 'Var7,Var227_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var181_mix', 'Var35,Var205_mix', 'Var35,Var210_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var227_mix', 'Var35,Var229_mix', 'Var35,Var192_mix', 'Var35,Var193_mix', 'Var35,Var197_mix', 'Var35,Var199_mix', 'Var35,Var202_mix']
----------
Var35,Var204_mix diff_metric: -7.470849320634354e-05
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix', 'Var7,Var203_mix', 'Var7,Var205_mix', 'Var7,Var211_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var225_mix', 'Var7,Var227_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var181_mix', 'Var35,Var205_mix', 'Var35,Var210_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var227_mix', 'Var35,Var229_mix', 'Var35,Var192_mix', 'Var35,Var193_mix', 'Var35,Var197_mix', 'Var35,Var199_mix', 'Var35,Var202_mix', 'Var35,Var204_mix']
----------
Var35,Var212_mix diff_metric: 9.63850465658389e-05
----------
----------
Var35,Var216_mix diff_metric: -0.00046448316896297026
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix', 'Var7,Var203_mix', 'Var7,Var205_mix', 'Var7,Var211_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var225_mix', 'Var7,Var227_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var181_mix', 'Var35,Var205_mix', 'Var35,Var210_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var227_mix', 'Var35,Var229_mix', 'Var35,Var192_mix', 'Var35,Var193_mix', 'Var35,Var197_mix', 'Var35,Var199_mix', 'Var35,Var202_mix', 'Var35,Var204_mix', 'Var35,Var216_mix']
----------
Var35,Var217_mix diff_metric: 0.0006609836456957785
----------
----------
Var35,Var220_mix diff_metric: 0.0004417984038193046
----------
----------
Var35,Var222_mix diff_metric: -7.279289081651985e-05
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix', 'Var7,Var203_mix', 'Var7,Var205_mix', 'Var7,Var211_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var225_mix', 'Var7,Var227_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var181_mix', 'Var35,Var205_mix', 'Var35,Var210_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var227_mix', 'Var35,Var229_mix', 'Var35,Var192_mix', 'Var35,Var193_mix', 'Var35,Var197_mix', 'Var35,Var199_mix', 'Var35,Var202_mix', 'Var35,Var204_mix', 'Var35,Var216_mix', 'Var35,Var222_mix']
----------
Var35,Var228_mix diff_metric: -0.0002652605204126468
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix', 'Var7,Var203_mix', 'Var7,Var205_mix', 'Var7,Var211_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var225_mix', 'Var7,Var227_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var181_mix', 'Var35,Var205_mix', 'Var35,Var210_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var227_mix', 'Var35,Var229_mix', 'Var35,Var192_mix', 'Var35,Var193_mix', 'Var35,Var197_mix', 'Var35,Var199_mix', 'Var35,Var202_mix', 'Var35,Var204_mix', 'Var35,Var216_mix', 'Var35,Var222_mix', 'Var35,Var228_mix']
----------
Var44,Var65_mix diff_metric: -0.0006026081833928609
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix', 'Var7,Var203_mix', 'Var7,Var205_mix', 'Var7,Var211_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var225_mix', 'Var7,Var227_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var181_mix', 'Var35,Var205_mix', 'Var35,Var210_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var227_mix', 'Var35,Var229_mix', 'Var35,Var192_mix', 'Var35,Var193_mix', 'Var35,Var197_mix', 'Var35,Var199_mix', 'Var35,Var202_mix', 'Var35,Var204_mix', 'Var35,Var216_mix', 'Var35,Var222_mix', 'Var35,Var228_mix', 'Var44,Var65_mix']
----------
Var44,Var72_mix diff_metric: 0.00014326689452937025
----------
----------
Var44,Var78_mix diff_metric: -5.2427012776412596e-05
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix', 'Var7,Var203_mix', 'Var7,Var205_mix', 'Var7,Var211_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var225_mix', 'Var7,Var227_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var181_mix', 'Var35,Var205_mix', 'Var35,Var210_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var227_mix', 'Var35,Var229_mix', 'Var35,Var192_mix', 'Var35,Var193_mix', 'Var35,Var197_mix', 'Var35,Var199_mix', 'Var35,Var202_mix', 'Var35,Var204_mix', 'Var35,Var216_mix', 'Var35,Var222_mix', 'Var35,Var228_mix', 'Var44,Var65_mix', 'Var44,Var78_mix']
----------
Var44,Var132_mix diff_metric: 0.00028552557727445915
----------
----------
Var44,Var143_mix diff_metric: 0.0
----------
----------
Var44,Var144_mix diff_metric: -3.7404657192374735e-05
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix', 'Var7,Var203_mix', 'Var7,Var205_mix', 'Var7,Var211_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var225_mix', 'Var7,Var227_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var181_mix', 'Var35,Var205_mix', 'Var35,Var210_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var227_mix', 'Var35,Var229_mix', 'Var35,Var192_mix', 'Var35,Var193_mix', 'Var35,Var197_mix', 'Var35,Var199_mix', 'Var35,Var202_mix', 'Var35,Var204_mix', 'Var35,Var216_mix', 'Var35,Var222_mix', 'Var35,Var228_mix', 'Var44,Var65_mix', 'Var44,Var78_mix', 'Var44,Var144_mix']
----------
Var44,Var173_mix diff_metric: -1.1896899053120968e-05
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix', 'Var7,Var203_mix', 'Var7,Var205_mix', 'Var7,Var211_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var225_mix', 'Var7,Var227_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var181_mix', 'Var35,Var205_mix', 'Var35,Var210_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var227_mix', 'Var35,Var229_mix', 'Var35,Var192_mix', 'Var35,Var193_mix', 'Var35,Var197_mix', 'Var35,Var199_mix', 'Var35,Var202_mix', 'Var35,Var204_mix', 'Var35,Var216_mix', 'Var35,Var222_mix', 'Var35,Var228_mix', 'Var44,Var65_mix', 'Var44,Var78_mix', 'Var44,Var144_mix', 'Var44,Var173_mix']
----------
Var44,Var181_mix diff_metric: 0.0
----------
----------
Var44,Var195_mix diff_metric: 5.484672105815491e-05
----------
----------
Var44,Var196_mix diff_metric: 3.085128059532316e-05
----------
----------
Var44,Var203_mix diff_metric: -0.0004627692089298474
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix', 'Var7,Var203_mix', 'Var7,Var205_mix', 'Var7,Var211_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var225_mix', 'Var7,Var227_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var181_mix', 'Var35,Var205_mix', 'Var35,Var210_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var227_mix', 'Var35,Var229_mix', 'Var35,Var192_mix', 'Var35,Var193_mix', 'Var35,Var197_mix', 'Var35,Var199_mix', 'Var35,Var202_mix', 'Var35,Var204_mix', 'Var35,Var216_mix', 'Var35,Var222_mix', 'Var35,Var228_mix', 'Var44,Var65_mix', 'Var44,Var78_mix', 'Var44,Var144_mix', 'Var44,Var173_mix', 'Var44,Var203_mix']
----------
Var44,Var205_mix diff_metric: 9.930886073994394e-05
----------
----------
Var44,Var206_mix diff_metric: -0.00019801279440923025
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix', 'Var7,Var203_mix', 'Var7,Var205_mix', 'Var7,Var211_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var225_mix', 'Var7,Var227_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var181_mix', 'Var35,Var205_mix', 'Var35,Var210_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var227_mix', 'Var35,Var229_mix', 'Var35,Var192_mix', 'Var35,Var193_mix', 'Var35,Var197_mix', 'Var35,Var199_mix', 'Var35,Var202_mix', 'Var35,Var204_mix', 'Var35,Var216_mix', 'Var35,Var222_mix', 'Var35,Var228_mix', 'Var44,Var65_mix', 'Var44,Var78_mix', 'Var44,Var144_mix', 'Var44,Var173_mix', 'Var44,Var203_mix', 'Var44,Var206_mix']
----------
Var44,Var207_mix diff_metric: 0.00030800870006109093
----------
----------
Var44,Var208_mix diff_metric: 0.0
----------
----------
Var44,Var210_mix diff_metric: -0.0002776615253577974
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix', 'Var7,Var203_mix', 'Var7,Var205_mix', 'Var7,Var211_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var225_mix', 'Var7,Var227_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var181_mix', 'Var35,Var205_mix', 'Var35,Var210_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var227_mix', 'Var35,Var229_mix', 'Var35,Var192_mix', 'Var35,Var193_mix', 'Var35,Var197_mix', 'Var35,Var199_mix', 'Var35,Var202_mix', 'Var35,Var204_mix', 'Var35,Var216_mix', 'Var35,Var222_mix', 'Var35,Var228_mix', 'Var44,Var65_mix', 'Var44,Var78_mix', 'Var44,Var144_mix', 'Var44,Var173_mix', 'Var44,Var203_mix', 'Var44,Var206_mix', 'Var44,Var210_mix']
----------
Var44,Var211_mix diff_metric: 0.0
----------
----------
Var44,Var218_mix diff_metric: -0.0005719585451544607
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix', 'Var7,Var203_mix', 'Var7,Var205_mix', 'Var7,Var211_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var225_mix', 'Var7,Var227_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var181_mix', 'Var35,Var205_mix', 'Var35,Var210_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var227_mix', 'Var35,Var229_mix', 'Var35,Var192_mix', 'Var35,Var193_mix', 'Var35,Var197_mix', 'Var35,Var199_mix', 'Var35,Var202_mix', 'Var35,Var204_mix', 'Var35,Var216_mix', 'Var35,Var222_mix', 'Var35,Var228_mix', 'Var44,Var65_mix', 'Var44,Var78_mix', 'Var44,Var144_mix', 'Var44,Var173_mix', 'Var44,Var203_mix', 'Var44,Var206_mix', 'Var44,Var210_mix', 'Var44,Var218_mix']
----------
Var44,Var219_mix diff_metric: 0.00010747537619160141
----------
----------
Var44,Var221_mix diff_metric: 2.6213506388206298e-05
----------
----------
Var44,Var223_mix diff_metric: -0.00015223997940827516
----------
----------
good_cat_columns: ['Var7', 'Var65', 'Var78', 'Var132', 'Var205', 'Var206', 'Var207', 'Var210', 'Var219', 'Var192', 'Var199', 'Var204', 'Var212', 'Var217', 'Var220', 'Var228', 'Var7,Var44_mix', 'Var7,Var65_mix', 'Var7,Var144_mix', 'Var7,Var173_mix', 'Var7,Var203_mix', 'Var7,Var205_mix', 'Var7,Var211_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var225_mix', 'Var7,Var227_mix', 'Var7,Var192_mix', 'Var7,Var193_mix', 'Var7,Var202_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var220_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var181_mix', 'Var35,Var205_mix', 'Var35,Var210_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var227_mix', 'Var35,Var229_mix', 'Var35,Var192_mix', 'Var35,Var193_mix', 'Var35,Var197_mix', 'Var35,Var199_mix', 'Var35,Var202_mix', 'Var35,Var204_mix', 'Var35,Var216_mix', 'Var35,Var222_mix', 'Var35,Var228_mix', 'Var44,Var65_mix', 'Var44,Var78_mix', 'Var44,Var144_mix', 'Var44,Var173_mix', 'Var44,Var203_mix', 'Var44,Var206_mix', 'Var44,Var210_mix', 'Var44,Var218_mix', 'Var44,Var223_mix']

In [71]:
good_columns2 = cat_cols_selected + num_cols_selected
len(good_columns2)
Out[71]:
88
In [72]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=380,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.08,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )



estimator_cb.fit(data_train[good_columns2], labels_train, cat_features = cat_cols_selected,verbose=10, plot=True,
                 eval_set = (data_val[good_columns2], labels_val))

prb = estimator_cb.predict_proba(data_val[good_columns2])                                                                          
                                                                
print(f'ROC AUC: {roc_auc_score(labels_val.values,prb[:,1])}')    
print(f'PRC AUC: {average_precision_score(labels_val.values,prb[:,1])}')   
                                                                        
pred = estimator_cb.predict(data_val[good_columns2])
print(classification_report(labels_val,pred))  
print('------------------')
print('------------------')

pr_plot(labels_val.values,prb,[0.05,0.1,0.2,0.25,0.3,0.35,0.4,0.5,0.6,0.7,0.8,0.9,0.95,0.96,0.97,0.99])
0:	learn: 0.5717896	test: 0.5725624	best: 0.5725624 (0)	total: 220ms	remaining: 1m 23s
10:	learn: 0.5753030	test: 0.5644728	best: 0.5725624 (0)	total: 2.01s	remaining: 1m 7s
20:	learn: 0.5793207	test: 0.5788289	best: 0.5788961 (19)	total: 3.81s	remaining: 1m 5s
30:	learn: 0.6475625	test: 0.6374274	best: 0.6374274 (30)	total: 5.64s	remaining: 1m 3s
40:	learn: 0.7030683	test: 0.6928180	best: 0.6928180 (40)	total: 7.65s	remaining: 1m 3s
50:	learn: 0.7172652	test: 0.7048878	best: 0.7048878 (50)	total: 9.64s	remaining: 1m 2s
60:	learn: 0.7291780	test: 0.7155506	best: 0.7155506 (60)	total: 11.7s	remaining: 1m 1s
70:	learn: 0.7356503	test: 0.7213190	best: 0.7213190 (70)	total: 13.9s	remaining: 1m
80:	learn: 0.7409525	test: 0.7246421	best: 0.7246421 (80)	total: 15.8s	remaining: 58.4s
90:	learn: 0.7440445	test: 0.7262865	best: 0.7262865 (90)	total: 17.8s	remaining: 56.5s
100:	learn: 0.7464353	test: 0.7266480	best: 0.7267088 (99)	total: 19.8s	remaining: 54.6s
110:	learn: 0.7495614	test: 0.7280057	best: 0.7280972 (108)	total: 21.8s	remaining: 52.8s
120:	learn: 0.7509536	test: 0.7279245	best: 0.7281186 (116)	total: 23.8s	remaining: 50.8s
130:	learn: 0.7528091	test: 0.7284215	best: 0.7284631 (127)	total: 25.6s	remaining: 48.7s
140:	learn: 0.7555723	test: 0.7301956	best: 0.7301956 (140)	total: 27.6s	remaining: 46.8s
150:	learn: 0.7572440	test: 0.7303799	best: 0.7304780 (149)	total: 29.5s	remaining: 44.7s
160:	learn: 0.7581297	test: 0.7301593	best: 0.7304862 (151)	total: 31.3s	remaining: 42.6s
170:	learn: 0.7602853	test: 0.7310192	best: 0.7311736 (168)	total: 33.2s	remaining: 40.6s
180:	learn: 0.7611569	test: 0.7309087	best: 0.7312401 (176)	total: 35s	remaining: 38.5s
190:	learn: 0.7622781	test: 0.7310489	best: 0.7312401 (176)	total: 36.9s	remaining: 36.5s
200:	learn: 0.7632616	test: 0.7315536	best: 0.7315562 (195)	total: 38.8s	remaining: 34.5s
210:	learn: 0.7637947	test: 0.7319719	best: 0.7319719 (210)	total: 40.6s	remaining: 32.6s
220:	learn: 0.7639661	test: 0.7320068	best: 0.7320922 (216)	total: 42.5s	remaining: 30.5s
230:	learn: 0.7646046	test: 0.7322912	best: 0.7323565 (227)	total: 44.3s	remaining: 28.6s
240:	learn: 0.7654751	test: 0.7321889	best: 0.7323565 (227)	total: 46.2s	remaining: 26.6s
250:	learn: 0.7662124	test: 0.7322444	best: 0.7323571 (247)	total: 48s	remaining: 24.7s
260:	learn: 0.7667329	test: 0.7320134	best: 0.7323571 (247)	total: 49.8s	remaining: 22.7s
270:	learn: 0.7677801	test: 0.7321323	best: 0.7323571 (247)	total: 51.8s	remaining: 20.8s
280:	learn: 0.7683566	test: 0.7321054	best: 0.7323571 (247)	total: 53.6s	remaining: 18.9s
290:	learn: 0.7687191	test: 0.7320813	best: 0.7323571 (247)	total: 55.3s	remaining: 16.9s
300:	learn: 0.7695372	test: 0.7326266	best: 0.7327452 (296)	total: 57.4s	remaining: 15.1s
310:	learn: 0.7701387	test: 0.7328939	best: 0.7329859 (306)	total: 59.1s	remaining: 13.1s
320:	learn: 0.7729799	test: 0.7336304	best: 0.7336304 (320)	total: 1m 1s	remaining: 11.2s
330:	learn: 0.7738027	test: 0.7335543	best: 0.7337504 (323)	total: 1m 2s	remaining: 9.3s
340:	learn: 0.7745481	test: 0.7336353	best: 0.7337504 (323)	total: 1m 4s	remaining: 7.4s
350:	learn: 0.7753753	test: 0.7334651	best: 0.7337504 (323)	total: 1m 6s	remaining: 5.5s
360:	learn: 0.7756076	test: 0.7334089	best: 0.7337504 (323)	total: 1m 8s	remaining: 3.6s
370:	learn: 0.7762032	test: 0.7334447	best: 0.7337504 (323)	total: 1m 10s	remaining: 1.7s
379:	learn: 0.7770715	test: 0.7330488	best: 0.7337504 (323)	total: 1m 11s	remaining: 0us
bestTest = 0.7337504327
bestIteration = 323
Shrink model to first 324 iterations.
ROC AUC: 0.7337502221846719
PRC AUC: 0.20658835964116112
              precision    recall  f1-score   support

          -1       0.93      1.00      0.96     11107
           1       0.50      0.02      0.03       893

    accuracy                           0.93     12000
   macro avg       0.71      0.51      0.50     12000
weighted avg       0.89      0.93      0.89     12000

------------------
------------------
In [73]:
data_plus_new_num = normed_fe_interaction_2_3_4_5(features[good_columns], max_feats = 100,num_columns=num_columns)
data_plus_new_num_test = normed_fe_interaction_2_3_4_5(test_data[good_columns], max_feats = 100,num_columns=num_columns)

data_plus_num_cat,cat_columns_new = categ_fe_interaction(data_plus_new_num, max_feats = 100,cat_columns=cat_columns)
data_plus_num_cat_test = categ_fe_interaction(data_plus_new_num_test, max_feats = 100,cat_columns=cat_columns)[0]

data = cat_prep(data_plus_num_cat,cat_columns_new)
data_test = cat_prep(data_plus_num_cat_test,cat_columns_new)

data.head()
Out[73]:
Var6 Var13 Var21 Var22 Var24 Var25 Var28 Var38 Var57 Var73 Var74 Var76 Var81 Var83 Var85 Var94 Var109 Var112 Var113 Var119 Var123 Var125 Var126 Var133 Var134 Var140 Var149 Var153 Var160 Var163 Var189 Var7 Var35 Var44 Var65 Var72 Var78 Var132 Var143 Var144 ... Var35,Var221_mix Var35,Var223_mix Var35,Var225_mix Var35,Var226_mix Var35,Var227_mix Var35,Var229_mix Var35,Var192_mix Var35,Var193_mix Var35,Var197_mix Var35,Var198_mix Var35,Var199_mix Var35,Var202_mix Var35,Var204_mix Var35,Var212_mix Var35,Var216_mix Var35,Var217_mix Var35,Var220_mix Var35,Var222_mix Var35,Var228_mix Var44,Var65_mix Var44,Var72_mix Var44,Var78_mix Var44,Var132_mix Var44,Var143_mix Var44,Var144_mix Var44,Var173_mix Var44,Var181_mix Var44,Var195_mix Var44,Var196_mix Var44,Var203_mix Var44,Var205_mix Var44,Var206_mix Var44,Var207_mix Var44,Var208_mix Var44,Var210_mix Var44,Var211_mix Var44,Var218_mix Var44,Var219_mix Var44,Var221_mix Var44,Var223_mix
0 3052.0 NaN 480.0 600.0 20.0 480.0 200.00 82752.0 2.907926 34 NaN 716008.0 14599.92 5.0 32.0 NaN 144.0 144.0 -1209960.0 1660.0 66.0 NaN 4.0 326915.0 604276.0 NaN 389396.0 2313888.0 28.0 599532.0 NaN nan 0.0 0.0 nan nan 0.0 0.0 0.0 9.0 ... 0.0Al6ZaUT 0.0LM8l689qOp 0.0nan 0.0fKCe 0.002N6s8f 0.0nan 0.0NESt0G8EIb 0.0AERks4l 0.00LaQ 0.0UaKK0yW 0.0I1sFbv_0IT 0.0EkHG 0.0k13i 0.0JBfYVit4g8 0.0TDctq2l 0.0KmRo 0.0hLKtJ9p 0.0vr93T2a 0.0xwM2aC7IdeMC0 0.0nan 0.0nan 0.00.0 0.00.0 0.00.0 0.09.0 0.00.0 0.00.0 0.0taul 0.01K8T 0.09_Y1 0.009_Q 0.0IYzP 0.0GjJ35utlTa_GNSvxxpb9ju 0.0kIsH 0.0uKAI 0.0L84s 0.0cJvF 0.0FzaX 0.0Al6ZaUT 0.0LM8l689qOp
1 1813.0 636.0 212.0 265.0 2.0 128.0 166.56 2706120.0 5.870327 128 0.0 1661128.0 67529.09 25.0 10.0 32289.0 80.0 72.0 417932.0 1025.0 66.0 24912.0 40.0 1934460.0 349568.0 205.0 735.0 6502680.0 14.0 364182.0 276.0 7.0 0.0 0.0 27.0 3.0 0.0 0.0 0.0 18.0 ... 0.0oslk 0.0LM8l689qOp 0.0ELof 0.0xb3V 0.0RAYp 0.0mj86 0.0P1WvyxLp3Z 0.02Knk1KF 0.0YFAj 0.0Bnunsla 0.0o64y9zI 0.0JDd6 0.0FbIm 0.0XfqtO3UdzaXh_ 0.0XTbqizz 0.0qMoY 0.0hN8KpA1 0.06hQ9lNX 0.055YFVY9 0.027.0 0.03.0 0.00.0 0.00.0 0.00.0 0.018.0 0.00.0 0.00.0 0.0taul 0.01K8T 0.09_Y1 0.0VpdQ 0.0haYg 0.0me75fM6ugJ 0.0kIsH 0.0uKAI 0.0L84s 0.0cJvF 0.0FzaX 0.0oslk 0.0LM8l689qOp
2 1953.0 448.0 176.0 220.0 0.0 72.0 311.76 4698780.0 5.981628 166 245.0 3025152.0 85266.00 35.0 0.0 53388.0 40.0 48.0 -124655.2 590.0 78.0 7218.0 36.0 3148410.0 1086210.0 400.0 0.0 10569040.0 18.0 0.0 NaN 7.0 0.0 0.0 18.0 3.0 0.0 0.0 0.0 27.0 ... 0.0zCkv 0.0LM8l689qOp 0.0nan 0.0FSa2 0.0ZI9m 0.0mj86 0.0FoxgUHSK8h 0.0LrdZy8QqgUfkVShG 0.0TyGl 0.0fhk21Ss 0.0nQUveAzAF7 0.0dnwD 0.0mTeA 0.04kVnq_T26xq1p 0.0pMWBUmQ 0.0qLXr 0.04UxGlow 0.0catzS2D 0.0ib5G6X1eUxUn6 0.018.0 0.03.0 0.00.0 0.00.0 0.00.0 0.027.0 0.00.0 0.00.0 0.0taul 0.01K8T 0.09_Y1 0.0VpdQ 0.0hAFG 0.07M47J5GA0pTYIFxg5uy 0.0kIsH 0.0uKAI 0.0L84s 0.0UYBR 0.0FzaX 0.0zCkv 0.0LM8l689qOp
3 1533.0 4.0 332.0 415.0 0.0 144.0 220.08 864384.0 5.108097 30 0.0 2642240.0 74107.20 10.0 2.0 NaN 32.0 32.0 378473.6 1435.0 24.0 693.0 NaN 7066700.0 650390.0 5.0 0.0 9676200.0 108.0 253284.0 NaN 7.0 5.0 0.0 9.0 nan 0.0 8.0 0.0 0.0 ... 5.0oslk 5.0LM8l689qOp 5.0nan 5.0xb3V 5.0RAYp 5.0nan 5.0vNEvyxLp3Z 5.0RO12 5.00Xwj 5.0uoZk2Zj 5.0LWyxgtXeJL 5.0CwmB 5.0vzJD 5.0NhsEn4L 5.0kZJtVhC 5.0JC0e 5.0ylCK5YS 5.0e4lqvY0 5.0F2FyR07IdsN7I 0.09.0 0.0nan 0.00.0 0.08.0 0.00.0 0.00.0 0.00.0 0.00.0 0.0taul 0.01K8T 0.0F3hy 0.0VpdQ 0.0IYzP 0.0me75fM6ugJ 0.0kIsH 0.0uKAI 0.0L84s 0.0cJvF 0.0FzaX 0.0oslk 0.0LM8l689qOp
4 686.0 0.0 160.0 200.0 2.0 48.0 278.00 4364880.0 0.650716 32 0.0 1440.0 171072.90 25.0 12.0 106455.0 32.0 8.0 142602.4 490.0 60.0 468.0 -28.0 3794460.0 642816.0 225.0 554414.0 10535200.0 24.0 2851284.0 NaN 7.0 0.0 0.0 9.0 3.0 0.0 0.0 0.0 9.0 ... 0.0oslk 0.0LM8l689qOp 0.0nan 0.0WqMG 0.0RAYp 0.0nan 0.04e7gUH7IEC 0.0RO12 0.0vSNn 0.0kugYdIL 0.0ZIXKpoNpqq 0.0625Z 0.0m_h1 0.0NhsEn4L 0.0NGZXfGp 0.0064o 0.0PYpzAu9 0.0MAz3HNj 0.0F2FyR07IdsN7I 0.09.0 0.03.0 0.00.0 0.00.0 0.00.0 0.09.0 0.00.0 0.00.0 0.0taul 0.01K8T 0.09_Y1 0.0sJzTlal 0.0zm5i 0.0me75fM6ugJ 0.0kIsH 0.0uKAI 0.0L84s 0.0cJvF 0.0FzaX 0.0oslk 0.0LM8l689qOp

5 rows × 572 columns

In [75]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=400,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.08,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )
 

estimator_cb.fit(data[good_columns2], labels, cat_features = cat_cols_selected,verbose=10, plot=True)
              
probs = estimator_cb.predict_proba(data_test[good_columns2])
write_to_submission_file(probs[:,1],out_file='submission_cb_new_fe_final2_selected_fe2.csv')
0:	learn: 0.5737631	total: 174ms	remaining: 1m 9s
10:	learn: 0.5769892	total: 1.9s	remaining: 1m 7s
20:	learn: 0.5810241	total: 3.67s	remaining: 1m 6s
30:	learn: 0.6535859	total: 5.49s	remaining: 1m 5s
40:	learn: 0.6970335	total: 7.35s	remaining: 1m 4s
50:	learn: 0.7127644	total: 9.32s	remaining: 1m 3s
60:	learn: 0.7217209	total: 11.3s	remaining: 1m 2s
70:	learn: 0.7290931	total: 13.3s	remaining: 1m 1s
80:	learn: 0.7342558	total: 15.2s	remaining: 59.7s
90:	learn: 0.7372249	total: 17.1s	remaining: 58s
100:	learn: 0.7408932	total: 19s	remaining: 56.1s
110:	learn: 0.7432154	total: 20.9s	remaining: 54.4s
120:	learn: 0.7468387	total: 22.8s	remaining: 52.6s
130:	learn: 0.7483605	total: 24.7s	remaining: 50.7s
140:	learn: 0.7502678	total: 26.6s	remaining: 48.8s
150:	learn: 0.7525784	total: 28.4s	remaining: 46.9s
160:	learn: 0.7536437	total: 30.3s	remaining: 44.9s
170:	learn: 0.7544737	total: 32.1s	remaining: 43s
180:	learn: 0.7555454	total: 33.9s	remaining: 41s
190:	learn: 0.7563573	total: 35.5s	remaining: 38.9s
200:	learn: 0.7569517	total: 37.4s	remaining: 37s
210:	learn: 0.7588648	total: 39.4s	remaining: 35.2s
220:	learn: 0.7602527	total: 41.2s	remaining: 33.4s
230:	learn: 0.7613576	total: 43.1s	remaining: 31.5s
240:	learn: 0.7619780	total: 44.8s	remaining: 29.5s
250:	learn: 0.7624863	total: 46.6s	remaining: 27.6s
260:	learn: 0.7635337	total: 48.3s	remaining: 25.7s
270:	learn: 0.7642916	total: 50.2s	remaining: 23.9s
280:	learn: 0.7650855	total: 52s	remaining: 22s
290:	learn: 0.7659465	total: 53.8s	remaining: 20.1s
300:	learn: 0.7666561	total: 55.6s	remaining: 18.3s
310:	learn: 0.7671987	total: 57.4s	remaining: 16.4s
320:	learn: 0.7676527	total: 59.1s	remaining: 14.5s
330:	learn: 0.7682389	total: 1m	remaining: 12.7s
340:	learn: 0.7685870	total: 1m 2s	remaining: 10.8s
350:	learn: 0.7687916	total: 1m 4s	remaining: 8.98s
360:	learn: 0.7689391	total: 1m 5s	remaining: 7.12s
370:	learn: 0.7692732	total: 1m 7s	remaining: 5.28s
380:	learn: 0.7701759	total: 1m 9s	remaining: 3.46s
390:	learn: 0.7705396	total: 1m 11s	remaining: 1.64s
399:	learn: 0.7708632	total: 1m 12s	remaining: 0us
In [77]:
sub_names = ['submission_cb_new_fe1','submission_mean3','submission_mean4','submission_mean5','submission_cb_new_fe6',
             'submission_mean6','submission_mean7','submission_mean8','submission_mean10']
submission_list = []
for submission_name in sub_names:
  sub = pd.read_csv(f'{submission_name}.csv',index_col=0)
  submission_list.append(sub) 
sub_df = pd.concat(submission_list,axis=1)
sub_df['mean'] = sub_df.mean(axis=1)
write_to_submission_file(sub_df[['mean']].values,out_file='submission_mean11.csv')

image.png

In [78]:
sub_names = ['submission_cb_new_fe1','submission_mean3','submission_mean4','submission_mean5','submission_cb_new_fe6',
             'submission_mean6','submission_mean7','submission_mean8','submission_mean10','submission_mean11','submission_cb_new_fe_final2_selected_fe2']
submission_list = []
for submission_name in sub_names:
  sub = pd.read_csv(f'{submission_name}.csv',index_col=0)
  submission_list.append(sub) 
sub_df = pd.concat(submission_list,axis=1)
sub_df['mean'] = sub_df.mean(axis=1)
write_to_submission_file(sub_df[['mean']].values,out_file='submission_mean12.csv')

image.png

In [ ]:
 
In [22]:
#Индексы значимых признаков полсле предобработки
[num_inds, cat_indices1,cat_indices2]= data_preprocessor(feats_train,params=[0.9, 0.7, 30])# params = [prop_nan,max_prop_unique,N_lim]
cat_columns = list(np.array(features.columns)[cat_indices1]) + list(np.array(features.columns)[cat_indices2])
num_columns = list(np.array(features.columns)[num_inds])
good_columns = num_columns + cat_columns
In [23]:
data_plus_new_num_train = normed_fe_interaction(feats_train[good_columns],level =2 ,max_feats = 100,num_columns=num_columns)
data_plus_new_num_val = normed_fe_interaction(feats_val[good_columns],level =2, max_feats = 100,num_columns=num_columns)

data_train = cat_prep(data_plus_new_num_train,cat_columns)
data_val = cat_prep(data_plus_new_num_val,cat_columns)
In [24]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=380,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.08,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )



estimator_cb.fit(data_train, labels_train, cat_features = cat_columns,verbose=10, plot=True,
                 eval_set = (data_val, labels_val))

num_cols_selected, cat_cols_selected ,importances = feature_selection_loop_prediction(estimator_cb,data_val,labels_val,cat_columns,threshold=0)
good_columns2 = num_cols_selected + cat_cols_selected
0:	learn: 0.5517338	test: 0.5421557	best: 0.5421557 (0)	total: 223ms	remaining: 1m 24s
10:	learn: 0.5748235	test: 0.5826505	best: 0.5827223 (9)	total: 1.58s	remaining: 53s
20:	learn: 0.5733492	test: 0.5832736	best: 0.5843405 (13)	total: 2.88s	remaining: 49.2s
30:	learn: 0.6556641	test: 0.6529666	best: 0.6529666 (30)	total: 4.24s	remaining: 47.8s
40:	learn: 0.7134964	test: 0.7068540	best: 0.7068540 (40)	total: 5.67s	remaining: 46.9s
50:	learn: 0.7279232	test: 0.7166966	best: 0.7166966 (50)	total: 7.06s	remaining: 45.6s
60:	learn: 0.7348147	test: 0.7209927	best: 0.7209927 (60)	total: 8.49s	remaining: 44.4s
70:	learn: 0.7393932	test: 0.7228555	best: 0.7228555 (70)	total: 9.9s	remaining: 43.1s
80:	learn: 0.7427719	test: 0.7245426	best: 0.7245426 (80)	total: 11.3s	remaining: 41.8s
90:	learn: 0.7460813	test: 0.7259691	best: 0.7261606 (88)	total: 12.7s	remaining: 40.4s
100:	learn: 0.7485174	test: 0.7269494	best: 0.7272254 (99)	total: 14.2s	remaining: 39.1s
110:	learn: 0.7499498	test: 0.7280466	best: 0.7280466 (110)	total: 15.5s	remaining: 37.5s
120:	learn: 0.7521975	test: 0.7280323	best: 0.7285205 (112)	total: 16.9s	remaining: 36.1s
130:	learn: 0.7534160	test: 0.7284071	best: 0.7285205 (112)	total: 18.2s	remaining: 34.5s
140:	learn: 0.7550470	test: 0.7290383	best: 0.7290984 (138)	total: 19.5s	remaining: 33s
150:	learn: 0.7581383	test: 0.7314144	best: 0.7315207 (147)	total: 20.9s	remaining: 31.7s
160:	learn: 0.7583324	test: 0.7312614	best: 0.7315207 (147)	total: 22.1s	remaining: 30.1s
170:	learn: 0.7595901	test: 0.7316087	best: 0.7316087 (170)	total: 23.4s	remaining: 28.6s
180:	learn: 0.7611783	test: 0.7320077	best: 0.7320967 (176)	total: 24.7s	remaining: 27.2s
190:	learn: 0.7622195	test: 0.7316966	best: 0.7321116 (181)	total: 26s	remaining: 25.8s
200:	learn: 0.7635547	test: 0.7315738	best: 0.7321116 (181)	total: 27.4s	remaining: 24.4s
210:	learn: 0.7664409	test: 0.7324554	best: 0.7324835 (209)	total: 28.7s	remaining: 23s
220:	learn: 0.7677107	test: 0.7322100	best: 0.7324835 (209)	total: 30s	remaining: 21.6s
230:	learn: 0.7703412	test: 0.7337475	best: 0.7337475 (230)	total: 31.3s	remaining: 20.2s
240:	learn: 0.7712647	test: 0.7336342	best: 0.7337663 (236)	total: 32.7s	remaining: 18.9s
250:	learn: 0.7726420	test: 0.7338556	best: 0.7339884 (249)	total: 34s	remaining: 17.5s
260:	learn: 0.7737630	test: 0.7341784	best: 0.7342852 (258)	total: 35.4s	remaining: 16.1s
270:	learn: 0.7749038	test: 0.7334599	best: 0.7342852 (258)	total: 36.8s	remaining: 14.8s
280:	learn: 0.7752951	test: 0.7332950	best: 0.7342852 (258)	total: 38s	remaining: 13.4s
290:	learn: 0.7761735	test: 0.7335486	best: 0.7342852 (258)	total: 39.3s	remaining: 12s
300:	learn: 0.7773518	test: 0.7337152	best: 0.7342852 (258)	total: 40.7s	remaining: 10.7s
310:	learn: 0.7780406	test: 0.7331188	best: 0.7342852 (258)	total: 41.9s	remaining: 9.3s
320:	learn: 0.7785560	test: 0.7329290	best: 0.7342852 (258)	total: 43.2s	remaining: 7.93s
330:	learn: 0.7795579	test: 0.7332746	best: 0.7342852 (258)	total: 44.5s	remaining: 6.58s
340:	learn: 0.7808609	test: 0.7330408	best: 0.7342852 (258)	total: 45.8s	remaining: 5.23s
350:	learn: 0.7817074	test: 0.7328947	best: 0.7342852 (258)	total: 47.1s	remaining: 3.89s
360:	learn: 0.7831978	test: 0.7329111	best: 0.7342852 (258)	total: 48.3s	remaining: 2.54s
370:	learn: 0.7840177	test: 0.7331819	best: 0.7342852 (258)	total: 49.6s	remaining: 1.2s
379:	learn: 0.7849902	test: 0.7329229	best: 0.7342852 (258)	total: 50.8s	remaining: 0us
bestTest = 0.7342852354
bestIteration = 258
Shrink model to first 259 iterations.
----------
Var6 diff_metric: 0.0003037742105677088
----------
----------
Var13 diff_metric: -0.00024847379420644966
----------
----------
good_num_columns: ['Var13']
----------
Var21 diff_metric: 0.0
----------
----------
Var22 diff_metric: 0.0
----------
----------
Var24 diff_metric: 0.0
----------
----------
Var25 diff_metric: 0.0
----------
----------
Var28 diff_metric: 0.0
----------
----------
Var38 diff_metric: 0.0
----------
----------
Var57 diff_metric: 6.855840132291569e-05
----------
----------
Var73 diff_metric: -0.0034654759551068226
----------
----------
good_num_columns: ['Var13', 'Var73']
----------
Var74 diff_metric: -0.002071824805861211
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74']
----------
Var76 diff_metric: 0.0
----------
----------
Var81 diff_metric: -0.0003153182354963535
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81']
----------
Var83 diff_metric: 0.0
----------
----------
Var85 diff_metric: 0.0
----------
----------
Var94 diff_metric: 0.0
----------
----------
Var109 diff_metric: 0.0
----------
----------
Var112 diff_metric: 0.0
----------
----------
Var113 diff_metric: -0.0036424675338161405
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113']
----------
Var119 diff_metric: 0.0
----------
----------
Var123 diff_metric: 0.0
----------
----------
Var125 diff_metric: 9.043659703933393e-05
----------
----------
Var126 diff_metric: -0.05149734069018752
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126']
----------
Var133 diff_metric: 0.0
----------
----------
Var134 diff_metric: 0.0
----------
----------
Var140 diff_metric: -0.0003685014071107551
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126', 'Var140']
----------
Var149 diff_metric: -7.717861207756638e-05
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126', 'Var140', 'Var149']
----------
Var153 diff_metric: 0.0
----------
----------
Var160 diff_metric: 0.0
----------
----------
Var163 diff_metric: 0.00022639395613333058
----------
----------
Var189 diff_metric: -0.003543511547200784
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126', 'Var140', 'Var149', 'Var189']
----------
Var7 diff_metric: -0.001985370645369522
----------
----------
good_cat_columns: ['Var7']
----------
Var35 diff_metric: -6.825593778758687e-05
----------
----------
good_cat_columns: ['Var7', 'Var35']
----------
Var44 diff_metric: 0.0
----------
----------
Var65 diff_metric: -0.00016796808324126555
----------
----------
good_cat_columns: ['Var7', 'Var35', 'Var65']
----------
Var72 diff_metric: 0.0
----------
----------
Var78 diff_metric: 0.0002611268520975596
----------
----------
Var132 diff_metric: 0.00011246602452319454
----------
----------
Var143 diff_metric: 0.0
----------
----------
Var144 diff_metric: 0.0
----------
----------
Var173 diff_metric: 0.0
----------
----------
Var181 diff_metric: 0.0
----------
----------
Var194 diff_metric: 0.0
----------
----------
Var195 diff_metric: -0.00020335631686529965
----------
----------
good_cat_columns: ['Var7', 'Var35', 'Var65', 'Var195']
----------
Var196 diff_metric: 0.0
----------
----------
Var201 diff_metric: 0.0
----------
----------
Var203 diff_metric: 0.0
----------
----------
Var205 diff_metric: -0.0017988010547105304
----------
----------
good_cat_columns: ['Var7', 'Var35', 'Var65', 'Var195', 'Var205']
----------
Var206 diff_metric: -0.002142702094287796
----------
----------
good_cat_columns: ['Var7', 'Var35', 'Var65', 'Var195', 'Var205', 'Var206']
----------
Var207 diff_metric: -0.00017805020108296876
----------
----------
good_cat_columns: ['Var7', 'Var35', 'Var65', 'Var195', 'Var205', 'Var206', 'Var207']
----------
Var208 diff_metric: 0.0
----------
----------
Var210 diff_metric: -0.001808278245481576
----------
----------
good_cat_columns: ['Var7', 'Var35', 'Var65', 'Var195', 'Var205', 'Var206', 'Var207', 'Var210']
----------
Var211 diff_metric: 0.0
----------
----------
Var218 diff_metric: -0.00038659880863645135
----------
----------
good_cat_columns: ['Var7', 'Var35', 'Var65', 'Var195', 'Var205', 'Var206', 'Var207', 'Var210', 'Var218']
----------
Var219 diff_metric: -0.00023279610096282877
----------
----------
good_cat_columns: ['Var7', 'Var35', 'Var65', 'Var195', 'Var205', 'Var206', 'Var207', 'Var210', 'Var218', 'Var219']
----------
Var221 diff_metric: 5.2023728062677854e-05
----------
----------
Var223 diff_metric: 0.0
----------
----------
Var225 diff_metric: 0.0
----------
----------
Var226 diff_metric: -0.0019101580462710377
----------
----------
good_cat_columns: ['Var7', 'Var35', 'Var65', 'Var195', 'Var205', 'Var206', 'Var207', 'Var210', 'Var218', 'Var219', 'Var226']
----------
Var227 diff_metric: 0.0
----------
----------
Var229 diff_metric: -9.401574887291098e-05
----------
----------
good_cat_columns: ['Var7', 'Var35', 'Var65', 'Var195', 'Var205', 'Var206', 'Var207', 'Var210', 'Var218', 'Var219', 'Var226', 'Var229']
----------
Var192 diff_metric: -0.004500203709190909
----------
----------
good_cat_columns: ['Var7', 'Var35', 'Var65', 'Var195', 'Var205', 'Var206', 'Var207', 'Var210', 'Var218', 'Var219', 'Var226', 'Var229', 'Var192']
----------
Var193 diff_metric: -0.0008805721722860982
----------
----------
good_cat_columns: ['Var7', 'Var35', 'Var65', 'Var195', 'Var205', 'Var206', 'Var207', 'Var210', 'Var218', 'Var219', 'Var226', 'Var229', 'Var192', 'Var193']
----------
Var197 diff_metric: 0.0002243271219759535
----------
----------
Var198 diff_metric: 0.0005407039798454027
----------
----------
Var199 diff_metric: -0.010133536642600305
----------
----------
good_cat_columns: ['Var7', 'Var35', 'Var65', 'Var195', 'Var205', 'Var206', 'Var207', 'Var210', 'Var218', 'Var219', 'Var226', 'Var229', 'Var192', 'Var193', 'Var199']
----------
Var202 diff_metric: -0.006676227202945251
----------
----------
good_cat_columns: ['Var7', 'Var35', 'Var65', 'Var195', 'Var205', 'Var206', 'Var207', 'Var210', 'Var218', 'Var219', 'Var226', 'Var229', 'Var192', 'Var193', 'Var199', 'Var202']
----------
Var204 diff_metric: -8.337911455003066e-05
----------
----------
good_cat_columns: ['Var7', 'Var35', 'Var65', 'Var195', 'Var205', 'Var206', 'Var207', 'Var210', 'Var218', 'Var219', 'Var226', 'Var229', 'Var192', 'Var193', 'Var199', 'Var202', 'Var204']
----------
Var212 diff_metric: 1.3157163783250603e-05
----------
----------
Var216 diff_metric: -0.0007470849320629913
----------
----------
good_cat_columns: ['Var7', 'Var35', 'Var65', 'Var195', 'Var205', 'Var206', 'Var207', 'Var210', 'Var218', 'Var219', 'Var226', 'Var229', 'Var192', 'Var193', 'Var199', 'Var202', 'Var204', 'Var216']
----------
Var217 diff_metric: -0.029666883801877875
----------
----------
good_cat_columns: ['Var7', 'Var35', 'Var65', 'Var195', 'Var205', 'Var206', 'Var207', 'Var210', 'Var218', 'Var219', 'Var226', 'Var229', 'Var192', 'Var193', 'Var199', 'Var202', 'Var204', 'Var216', 'Var217']
----------
Var220 diff_metric: -0.00017623541987132896
----------
----------
good_cat_columns: ['Var7', 'Var35', 'Var65', 'Var195', 'Var205', 'Var206', 'Var207', 'Var210', 'Var218', 'Var219', 'Var226', 'Var229', 'Var192', 'Var193', 'Var199', 'Var202', 'Var204', 'Var216', 'Var217', 'Var220']
----------
Var222 diff_metric: -0.00018500686239353303
----------
----------
good_cat_columns: ['Var7', 'Var35', 'Var65', 'Var195', 'Var205', 'Var206', 'Var207', 'Var210', 'Var218', 'Var219', 'Var226', 'Var229', 'Var192', 'Var193', 'Var199', 'Var202', 'Var204', 'Var216', 'Var217', 'Var220', 'Var222']
----------
Var228 diff_metric: -0.001744004744241301
----------
----------
good_cat_columns: ['Var7', 'Var35', 'Var65', 'Var195', 'Var205', 'Var206', 'Var207', 'Var210', 'Var218', 'Var219', 'Var226', 'Var229', 'Var192', 'Var193', 'Var199', 'Var202', 'Var204', 'Var216', 'Var217', 'Var220', 'Var222', 'Var228']
----------
Var6,Var13_mix diff_metric: 5.918203173027603e-05
----------
----------
Var6,Var21_mix diff_metric: 0.0
----------
----------
Var6,Var22_mix diff_metric: 0.0
----------
----------
Var6,Var24_mix diff_metric: 0.0
----------
----------
Var6,Var25_mix diff_metric: 0.0
----------
----------
Var6,Var28_mix diff_metric: -0.0002947507150994344
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126', 'Var140', 'Var149', 'Var189', 'Var6,Var28_mix']
----------
Var6,Var38_mix diff_metric: -0.0005454425752310366
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126', 'Var140', 'Var149', 'Var189', 'Var6,Var28_mix', 'Var6,Var38_mix']
----------
Var6,Var57_mix diff_metric: 0.0
----------
----------
Var6,Var73_mix diff_metric: 0.0005936855091031035
----------
----------
Var6,Var74_mix diff_metric: 0.0
----------
----------
Var6,Var76_mix diff_metric: 0.0
----------
----------
Var6,Var81_mix diff_metric: 3.72030148356739e-05
----------
----------
Var6,Var83_mix diff_metric: 0.00021122036878173933
----------
----------
Var6,Var85_mix diff_metric: 0.0
----------
----------
Var6,Var94_mix diff_metric: 0.0
----------
----------
Var6,Var109_mix diff_metric: 0.0
----------
----------
Var6,Var112_mix diff_metric: 0.0
----------
----------
Var6,Var113_mix diff_metric: 4.032847136237194e-07
----------
----------
Var6,Var119_mix diff_metric: 0.0
----------
----------
Var6,Var123_mix diff_metric: 0.0
----------
----------
Var6,Var125_mix diff_metric: 0.0
----------
----------
Var6,Var126_mix diff_metric: -0.00022311726783474928
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126', 'Var140', 'Var149', 'Var189', 'Var6,Var28_mix', 'Var6,Var38_mix', 'Var6,Var126_mix']
----------
Var6,Var133_mix diff_metric: 0.0
----------
----------
Var6,Var134_mix diff_metric: -4.496624557348561e-05
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126', 'Var140', 'Var149', 'Var189', 'Var6,Var28_mix', 'Var6,Var38_mix', 'Var6,Var126_mix', 'Var6,Var134_mix']
----------
Var6,Var140_mix diff_metric: 0.0
----------
----------
Var6,Var149_mix diff_metric: 6.422309065101661e-05
----------
----------
Var6,Var153_mix diff_metric: 0.0
----------
----------
Var6,Var160_mix diff_metric: -0.00012259855295393418
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126', 'Var140', 'Var149', 'Var189', 'Var6,Var28_mix', 'Var6,Var38_mix', 'Var6,Var126_mix', 'Var6,Var134_mix', 'Var6,Var160_mix']
----------
Var6,Var163_mix diff_metric: 0.0
----------
----------
Var6,Var189_mix diff_metric: 5.187249629501345e-05
----------
----------
Var13,Var21_mix diff_metric: 0.0
----------
----------
Var13,Var22_mix diff_metric: 0.0
----------
----------
Var13,Var24_mix diff_metric: 0.0
----------
----------
Var13,Var25_mix diff_metric: -7.16838578539436e-05
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126', 'Var140', 'Var149', 'Var189', 'Var6,Var28_mix', 'Var6,Var38_mix', 'Var6,Var126_mix', 'Var6,Var134_mix', 'Var6,Var160_mix', 'Var13,Var25_mix']
----------
Var13,Var28_mix diff_metric: 1.8551096828689495e-05
----------
----------
Var13,Var38_mix diff_metric: -0.0001345962731855721
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126', 'Var140', 'Var149', 'Var189', 'Var6,Var28_mix', 'Var6,Var38_mix', 'Var6,Var126_mix', 'Var6,Var134_mix', 'Var6,Var160_mix', 'Var13,Var25_mix', 'Var13,Var38_mix']
----------
Var13,Var57_mix diff_metric: 0.0
----------
----------
Var13,Var73_mix diff_metric: -1.6635494439753984e-06
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126', 'Var140', 'Var149', 'Var189', 'Var6,Var28_mix', 'Var6,Var38_mix', 'Var6,Var126_mix', 'Var6,Var134_mix', 'Var6,Var160_mix', 'Var13,Var25_mix', 'Var13,Var38_mix', 'Var13,Var73_mix']
----------
Var13,Var74_mix diff_metric: 0.0
----------
----------
Var13,Var76_mix diff_metric: 0.0
----------
----------
Var13,Var81_mix diff_metric: 0.0
----------
----------
Var13,Var83_mix diff_metric: 0.0
----------
----------
Var13,Var85_mix diff_metric: 0.0
----------
----------
Var13,Var94_mix diff_metric: 0.0
----------
----------
Var13,Var109_mix diff_metric: -0.00016348154080170207
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126', 'Var140', 'Var149', 'Var189', 'Var6,Var28_mix', 'Var6,Var38_mix', 'Var6,Var126_mix', 'Var6,Var134_mix', 'Var6,Var160_mix', 'Var13,Var25_mix', 'Var13,Var38_mix', 'Var13,Var73_mix', 'Var13,Var109_mix']
----------
Var13,Var112_mix diff_metric: -0.00016292702432041395
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126', 'Var140', 'Var149', 'Var189', 'Var6,Var28_mix', 'Var6,Var38_mix', 'Var6,Var126_mix', 'Var6,Var134_mix', 'Var6,Var160_mix', 'Var13,Var25_mix', 'Var13,Var38_mix', 'Var13,Var73_mix', 'Var13,Var109_mix', 'Var13,Var112_mix']
----------
Var13,Var113_mix diff_metric: 0.0
----------
----------
Var13,Var119_mix diff_metric: 0.0
----------
----------
Var13,Var123_mix diff_metric: 0.0
----------
----------
Var13,Var125_mix diff_metric: -0.00026611750042926374
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126', 'Var140', 'Var149', 'Var189', 'Var6,Var28_mix', 'Var6,Var38_mix', 'Var6,Var126_mix', 'Var6,Var134_mix', 'Var6,Var160_mix', 'Var13,Var25_mix', 'Var13,Var38_mix', 'Var13,Var73_mix', 'Var13,Var109_mix', 'Var13,Var112_mix', 'Var13,Var125_mix']
----------
Var13,Var126_mix diff_metric: -0.010955380478458898
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126', 'Var140', 'Var149', 'Var189', 'Var6,Var28_mix', 'Var6,Var38_mix', 'Var6,Var126_mix', 'Var6,Var134_mix', 'Var6,Var160_mix', 'Var13,Var25_mix', 'Var13,Var38_mix', 'Var13,Var73_mix', 'Var13,Var109_mix', 'Var13,Var112_mix', 'Var13,Var125_mix', 'Var13,Var126_mix']
----------
Var13,Var133_mix diff_metric: 0.0
----------
----------
Var13,Var134_mix diff_metric: 0.0
----------
----------
Var13,Var140_mix diff_metric: 0.0
----------
----------
Var13,Var149_mix diff_metric: 0.0
----------
----------
Var13,Var153_mix diff_metric: -2.520529459593135e-07
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126', 'Var140', 'Var149', 'Var189', 'Var6,Var28_mix', 'Var6,Var38_mix', 'Var6,Var126_mix', 'Var6,Var134_mix', 'Var6,Var160_mix', 'Var13,Var25_mix', 'Var13,Var38_mix', 'Var13,Var73_mix', 'Var13,Var109_mix', 'Var13,Var112_mix', 'Var13,Var125_mix', 'Var13,Var126_mix', 'Var13,Var153_mix']
----------
Var13,Var160_mix diff_metric: 0.0
----------
----------
Var13,Var163_mix diff_metric: 0.0002592112497077359
----------
----------
Var13,Var189_mix diff_metric: -0.0014653350070994442
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126', 'Var140', 'Var149', 'Var189', 'Var6,Var28_mix', 'Var6,Var38_mix', 'Var6,Var126_mix', 'Var6,Var134_mix', 'Var6,Var160_mix', 'Var13,Var25_mix', 'Var13,Var38_mix', 'Var13,Var73_mix', 'Var13,Var109_mix', 'Var13,Var112_mix', 'Var13,Var125_mix', 'Var13,Var126_mix', 'Var13,Var153_mix', 'Var13,Var189_mix']
----------
Var21,Var22_mix diff_metric: 0.0
----------
----------
Var21,Var24_mix diff_metric: 0.0
----------
----------
Var21,Var25_mix diff_metric: 0.0
----------
----------
Var21,Var28_mix diff_metric: 0.0
----------
----------
Var21,Var38_mix diff_metric: 0.0
----------
----------
Var21,Var57_mix diff_metric: 0.0
----------
----------
Var21,Var73_mix diff_metric: -4.103421961532128e-05
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126', 'Var140', 'Var149', 'Var189', 'Var6,Var28_mix', 'Var6,Var38_mix', 'Var6,Var126_mix', 'Var6,Var134_mix', 'Var6,Var160_mix', 'Var13,Var25_mix', 'Var13,Var38_mix', 'Var13,Var73_mix', 'Var13,Var109_mix', 'Var13,Var112_mix', 'Var13,Var125_mix', 'Var13,Var126_mix', 'Var13,Var153_mix', 'Var13,Var189_mix', 'Var21,Var73_mix']
----------
Var21,Var74_mix diff_metric: 0.0
----------
----------
Var21,Var76_mix diff_metric: -7.158303667531563e-05
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126', 'Var140', 'Var149', 'Var189', 'Var6,Var28_mix', 'Var6,Var38_mix', 'Var6,Var126_mix', 'Var6,Var134_mix', 'Var6,Var160_mix', 'Var13,Var25_mix', 'Var13,Var38_mix', 'Var13,Var73_mix', 'Var13,Var109_mix', 'Var13,Var112_mix', 'Var13,Var125_mix', 'Var13,Var126_mix', 'Var13,Var153_mix', 'Var13,Var189_mix', 'Var21,Var73_mix', 'Var21,Var76_mix']
----------
Var21,Var81_mix diff_metric: 0.0
----------
----------
Var21,Var83_mix diff_metric: -2.6969665226306283e-05
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126', 'Var140', 'Var149', 'Var189', 'Var6,Var28_mix', 'Var6,Var38_mix', 'Var6,Var126_mix', 'Var6,Var134_mix', 'Var6,Var160_mix', 'Var13,Var25_mix', 'Var13,Var38_mix', 'Var13,Var73_mix', 'Var13,Var109_mix', 'Var13,Var112_mix', 'Var13,Var125_mix', 'Var13,Var126_mix', 'Var13,Var153_mix', 'Var13,Var189_mix', 'Var21,Var73_mix', 'Var21,Var76_mix', 'Var21,Var83_mix']
----------
Var21,Var85_mix diff_metric: 0.0
----------
----------
Var21,Var94_mix diff_metric: 0.0
----------
----------
Var21,Var109_mix diff_metric: -5.6308628145429473e-05
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126', 'Var140', 'Var149', 'Var189', 'Var6,Var28_mix', 'Var6,Var38_mix', 'Var6,Var126_mix', 'Var6,Var134_mix', 'Var6,Var160_mix', 'Var13,Var25_mix', 'Var13,Var38_mix', 'Var13,Var73_mix', 'Var13,Var109_mix', 'Var13,Var112_mix', 'Var13,Var125_mix', 'Var13,Var126_mix', 'Var13,Var153_mix', 'Var13,Var189_mix', 'Var21,Var73_mix', 'Var21,Var76_mix', 'Var21,Var83_mix', 'Var21,Var109_mix']
----------
Var21,Var112_mix diff_metric: 0.0
----------
----------
Var21,Var113_mix diff_metric: 0.0
----------
----------
Var21,Var119_mix diff_metric: 0.0
----------
----------
Var21,Var123_mix diff_metric: 0.0
----------
----------
Var21,Var125_mix diff_metric: 0.0
----------
----------
Var21,Var126_mix diff_metric: -0.0019276505207262984
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126', 'Var140', 'Var149', 'Var189', 'Var6,Var28_mix', 'Var6,Var38_mix', 'Var6,Var126_mix', 'Var6,Var134_mix', 'Var6,Var160_mix', 'Var13,Var25_mix', 'Var13,Var38_mix', 'Var13,Var73_mix', 'Var13,Var109_mix', 'Var13,Var112_mix', 'Var13,Var125_mix', 'Var13,Var126_mix', 'Var13,Var153_mix', 'Var13,Var189_mix', 'Var21,Var73_mix', 'Var21,Var76_mix', 'Var21,Var83_mix', 'Var21,Var109_mix', 'Var21,Var126_mix']
----------
Var21,Var133_mix diff_metric: -0.00042173498931463715
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126', 'Var140', 'Var149', 'Var189', 'Var6,Var28_mix', 'Var6,Var38_mix', 'Var6,Var126_mix', 'Var6,Var134_mix', 'Var6,Var160_mix', 'Var13,Var25_mix', 'Var13,Var38_mix', 'Var13,Var73_mix', 'Var13,Var109_mix', 'Var13,Var112_mix', 'Var13,Var125_mix', 'Var13,Var126_mix', 'Var13,Var153_mix', 'Var13,Var189_mix', 'Var21,Var73_mix', 'Var21,Var76_mix', 'Var21,Var83_mix', 'Var21,Var109_mix', 'Var21,Var126_mix', 'Var21,Var133_mix']
----------
Var21,Var134_mix diff_metric: -4.536953028710933e-06
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126', 'Var140', 'Var149', 'Var189', 'Var6,Var28_mix', 'Var6,Var38_mix', 'Var6,Var126_mix', 'Var6,Var134_mix', 'Var6,Var160_mix', 'Var13,Var25_mix', 'Var13,Var38_mix', 'Var13,Var73_mix', 'Var13,Var109_mix', 'Var13,Var112_mix', 'Var13,Var125_mix', 'Var13,Var126_mix', 'Var13,Var153_mix', 'Var13,Var189_mix', 'Var21,Var73_mix', 'Var21,Var76_mix', 'Var21,Var83_mix', 'Var21,Var109_mix', 'Var21,Var126_mix', 'Var21,Var133_mix', 'Var21,Var134_mix']
----------
Var21,Var140_mix diff_metric: 0.0
----------
----------
Var21,Var149_mix diff_metric: 0.0
----------
----------
Var21,Var153_mix diff_metric: 0.0
----------
----------
Var21,Var160_mix diff_metric: 0.00026863802988974506
----------
----------
Var21,Var163_mix diff_metric: 0.0
----------
----------
Var21,Var189_mix diff_metric: -7.521259909837319e-05
----------
----------
good_num_columns: ['Var13', 'Var73', 'Var74', 'Var81', 'Var113', 'Var126', 'Var140', 'Var149', 'Var189', 'Var6,Var28_mix', 'Var6,Var38_mix', 'Var6,Var126_mix', 'Var6,Var134_mix', 'Var6,Var160_mix', 'Var13,Var25_mix', 'Var13,Var38_mix', 'Var13,Var73_mix', 'Var13,Var109_mix', 'Var13,Var112_mix', 'Var13,Var125_mix', 'Var13,Var126_mix', 'Var13,Var153_mix', 'Var13,Var189_mix', 'Var21,Var73_mix', 'Var21,Var76_mix', 'Var21,Var83_mix', 'Var21,Var109_mix', 'Var21,Var126_mix', 'Var21,Var133_mix', 'Var21,Var134_mix', 'Var21,Var189_mix']
----------
Var22,Var24_mix diff_metric: 0.0
----------
----------
Var22,Var25_mix diff_metric: 0.0
----------
----------
Var22,Var28_mix diff_metric: 0.00012481661887908668
----------
----------
Var22,Var38_mix diff_metric: 0.0
----------
----------
Var22,Var57_mix diff_metric: 0.0
----------
----------
Var22,Var73_mix diff_metric: 0.0
----------
----------
Var22,Var74_mix diff_metric: 0.0
----------
----------
Var22,Var76_mix diff_metric: 0.0
----------
----------
Var22,Var81_mix diff_metric: 0.0
----------
----------
Var22,Var83_mix diff_metric: 0.0
----------
----------
Var22,Var85_mix diff_metric: 3.735424660311626e-05
----------
----------
Var22,Var94_mix diff_metric: 0.0
----------
----------
Var22,Var109_mix diff_metric: 0.0
----------

In [25]:
data_plus_new_num = normed_fe_interaction(features[good_columns],level =2 ,max_feats = 100,num_columns=num_columns)
data_plus_new_num_test = normed_fe_interaction(test_data[good_columns],level =2, max_feats = 100,num_columns=num_columns)

data = cat_prep(data_plus_new_num ,cat_columns)
data_test = cat_prep(data_plus_new_num_test,cat_columns)
In [26]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=380,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.08,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )
 

estimator_cb.fit(data[good_columns2], labels, cat_features = cat_cols_selected,verbose=10, plot=True)
              
probs = estimator_cb.predict_proba(data_test[good_columns2])
write_to_submission_file(probs[:,1],out_file='submission_cb_new_fe_final3_selected_fe3.csv')
0:	learn: 0.5671884	total: 121ms	remaining: 45.9s
10:	learn: 0.5727115	total: 1.13s	remaining: 37.8s
20:	learn: 0.5797753	total: 2.06s	remaining: 35.3s
30:	learn: 0.6449493	total: 3.06s	remaining: 34.4s
40:	learn: 0.7064194	total: 4.17s	remaining: 34.5s
50:	learn: 0.7177590	total: 5.32s	remaining: 34.3s
60:	learn: 0.7274616	total: 6.46s	remaining: 33.8s
70:	learn: 0.7353428	total: 7.59s	remaining: 33s
80:	learn: 0.7403195	total: 8.68s	remaining: 32s
90:	learn: 0.7431551	total: 9.74s	remaining: 30.9s
100:	learn: 0.7475255	total: 10.8s	remaining: 29.9s
110:	learn: 0.7497698	total: 12s	remaining: 29.1s
120:	learn: 0.7525703	total: 13.1s	remaining: 28s
130:	learn: 0.7533948	total: 14.2s	remaining: 26.9s
140:	learn: 0.7548017	total: 15.3s	remaining: 25.9s
150:	learn: 0.7562038	total: 16.4s	remaining: 24.9s
160:	learn: 0.7571903	total: 17.4s	remaining: 23.7s
170:	learn: 0.7583186	total: 18.5s	remaining: 22.6s
180:	learn: 0.7599198	total: 19.6s	remaining: 21.5s
190:	learn: 0.7608023	total: 20.6s	remaining: 20.4s
200:	learn: 0.7611438	total: 21.7s	remaining: 19.3s
210:	learn: 0.7616245	total: 22.7s	remaining: 18.2s
220:	learn: 0.7624743	total: 23.7s	remaining: 17.1s
230:	learn: 0.7635298	total: 24.8s	remaining: 16s
240:	learn: 0.7635802	total: 25.8s	remaining: 14.9s
250:	learn: 0.7640935	total: 26.8s	remaining: 13.8s
260:	learn: 0.7647226	total: 27.9s	remaining: 12.7s
270:	learn: 0.7649325	total: 28.8s	remaining: 11.6s
280:	learn: 0.7656254	total: 29.9s	remaining: 10.5s
290:	learn: 0.7657983	total: 30.8s	remaining: 9.43s
300:	learn: 0.7657962	total: 31.7s	remaining: 8.32s
310:	learn: 0.7662036	total: 32.7s	remaining: 7.25s
320:	learn: 0.7664236	total: 33.6s	remaining: 6.17s
330:	learn: 0.7666903	total: 34.5s	remaining: 5.11s
340:	learn: 0.7675592	total: 35.5s	remaining: 4.06s
350:	learn: 0.7677030	total: 36.5s	remaining: 3.02s
360:	learn: 0.7681322	total: 37.6s	remaining: 1.98s
370:	learn: 0.7686514	total: 38.6s	remaining: 936ms
379:	learn: 0.7688996	total: 39.5s	remaining: 0us
In [ ]:
 
In [27]:
data_plus_new_num_train = normed_fe_interaction(feats_train[good_columns],level =2 ,max_feats = 100,num_columns=num_columns)
data_plus_new_num_val = normed_fe_interaction(feats_val[good_columns],level =2, max_feats = 100,num_columns=num_columns)

data_plus_num_cat_train,cat_columns_new = categ_fe_interaction(data_plus_new_num_train, max_feats = 100,cat_columns=cat_columns)
data_plus_num_cat_val = categ_fe_interaction(data_plus_new_num_val, max_feats = 100,cat_columns=cat_columns)[0]

data_train = cat_prep(data_plus_num_cat_train,cat_columns_new)
data_val = cat_prep(data_plus_num_cat_val,cat_columns_new)
In [28]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=380,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.08,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )



estimator_cb.fit(data_train, labels_train, cat_features = cat_columns_new,verbose=10, plot=True,
                 eval_set = (data_val, labels_val))

num_cols_selected, cat_cols_selected ,importances = feature_selection_loop_prediction(estimator_cb,data_val,labels_val,cat_columns_new,threshold=0)
good_columns2 = num_cols_selected + cat_cols_selected
0:	learn: 0.5567222	test: 0.5651166	best: 0.5651166 (0)	total: 382ms	remaining: 2m 24s
10:	learn: 0.5801440	test: 0.5830351	best: 0.5836101 (7)	total: 3.23s	remaining: 1m 48s
20:	learn: 0.5819111	test: 0.5845946	best: 0.5872339 (13)	total: 5.97s	remaining: 1m 42s
30:	learn: 0.6657282	test: 0.6600390	best: 0.6600390 (30)	total: 9.05s	remaining: 1m 41s
40:	learn: 0.7013708	test: 0.6972329	best: 0.6972329 (40)	total: 12.3s	remaining: 1m 41s
50:	learn: 0.7185171	test: 0.7104258	best: 0.7104258 (50)	total: 15.7s	remaining: 1m 41s
60:	learn: 0.7278197	test: 0.7151800	best: 0.7151800 (60)	total: 19.1s	remaining: 1m 39s
70:	learn: 0.7319253	test: 0.7156581	best: 0.7158191 (68)	total: 22.4s	remaining: 1m 37s
80:	learn: 0.7377417	test: 0.7197298	best: 0.7197298 (80)	total: 25.6s	remaining: 1m 34s
90:	learn: 0.7428199	test: 0.7225719	best: 0.7225719 (90)	total: 29s	remaining: 1m 32s
100:	learn: 0.7465822	test: 0.7258849	best: 0.7258849 (100)	total: 32.1s	remaining: 1m 28s
110:	learn: 0.7487278	test: 0.7256358	best: 0.7258849 (100)	total: 35.2s	remaining: 1m 25s
120:	learn: 0.7507529	test: 0.7262572	best: 0.7262572 (120)	total: 38.5s	remaining: 1m 22s
130:	learn: 0.7550052	test: 0.7281746	best: 0.7281746 (130)	total: 41.9s	remaining: 1m 19s
140:	learn: 0.7558659	test: 0.7281433	best: 0.7283661 (131)	total: 44.9s	remaining: 1m 16s
150:	learn: 0.7580246	test: 0.7297882	best: 0.7297882 (150)	total: 47.9s	remaining: 1m 12s
160:	learn: 0.7590281	test: 0.7293726	best: 0.7300267 (155)	total: 50.9s	remaining: 1m 9s
170:	learn: 0.7605188	test: 0.7290104	best: 0.7300267 (155)	total: 54.1s	remaining: 1m 6s
180:	learn: 0.7618663	test: 0.7288310	best: 0.7300267 (155)	total: 57.3s	remaining: 1m 2s
190:	learn: 0.7635201	test: 0.7293168	best: 0.7300267 (155)	total: 1m	remaining: 59.7s
200:	learn: 0.7648082	test: 0.7292397	best: 0.7300267 (155)	total: 1m 3s	remaining: 56.5s
210:	learn: 0.7654866	test: 0.7293651	best: 0.7300267 (155)	total: 1m 6s	remaining: 53.3s
220:	learn: 0.7664793	test: 0.7286377	best: 0.7300267 (155)	total: 1m 9s	remaining: 50.1s
230:	learn: 0.7676792	test: 0.7289226	best: 0.7300267 (155)	total: 1m 12s	remaining: 46.8s
240:	learn: 0.7686311	test: 0.7287342	best: 0.7300267 (155)	total: 1m 15s	remaining: 43.8s
250:	learn: 0.7694915	test: 0.7290083	best: 0.7300267 (155)	total: 1m 18s	remaining: 40.6s
260:	learn: 0.7702132	test: 0.7292442	best: 0.7300267 (155)	total: 1m 21s	remaining: 37.4s
270:	learn: 0.7709432	test: 0.7291700	best: 0.7300267 (155)	total: 1m 24s	remaining: 34.1s
280:	learn: 0.7716403	test: 0.7292854	best: 0.7300267 (155)	total: 1m 27s	remaining: 31s
290:	learn: 0.7721972	test: 0.7294517	best: 0.7300267 (155)	total: 1m 30s	remaining: 27.8s
300:	learn: 0.7737574	test: 0.7294076	best: 0.7300267 (155)	total: 1m 34s	remaining: 24.7s
310:	learn: 0.7757953	test: 0.7306234	best: 0.7306234 (310)	total: 1m 37s	remaining: 21.5s
320:	learn: 0.7778657	test: 0.7307599	best: 0.7308579 (319)	total: 1m 40s	remaining: 18.4s
330:	learn: 0.7793507	test: 0.7310590	best: 0.7310590 (330)	total: 1m 43s	remaining: 15.3s
340:	learn: 0.7804024	test: 0.7309676	best: 0.7310590 (330)	total: 1m 46s	remaining: 12.2s
350:	learn: 0.7812406	test: 0.7311090	best: 0.7312096 (347)	total: 1m 49s	remaining: 9.05s
360:	learn: 0.7815987	test: 0.7309465	best: 0.7312096 (347)	total: 1m 52s	remaining: 5.93s
370:	learn: 0.7834525	test: 0.7309616	best: 0.7312635 (365)	total: 1m 55s	remaining: 2.81s
379:	learn: 0.7841870	test: 0.7311837	best: 0.7312635 (365)	total: 1m 58s	remaining: 0us
bestTest = 0.7312634587
bestIteration = 365
Shrink model to first 366 iterations.
----------
Var6 diff_metric: 0.0
----------
----------
Var13 diff_metric: 0.0
----------
----------
Var21 diff_metric: -0.00017300914216200614
----------
----------
good_num_columns: ['Var21']
----------
Var22 diff_metric: 0.0
----------
----------
Var24 diff_metric: 0.0
----------
----------
Var25 diff_metric: -8.40848627989832e-05
----------
----------
good_num_columns: ['Var21', 'Var25']
----------
Var28 diff_metric: 0.00028562639845264304
----------
----------
Var38 diff_metric: 0.0
----------
----------
Var57 diff_metric: -0.00022140330780173745
----------
----------
good_num_columns: ['Var21', 'Var25', 'Var57']
----------
Var73 diff_metric: -0.0014902378381680403
----------
----------
good_num_columns: ['Var21', 'Var25', 'Var57', 'Var73']
----------
Var74 diff_metric: -0.001339812639971405
----------
----------
good_num_columns: ['Var21', 'Var25', 'Var57', 'Var73', 'Var74']
----------
Var76 diff_metric: 0.0
----------
----------
Var81 diff_metric: -0.0007267190540231061
----------
----------
good_num_columns: ['Var21', 'Var25', 'Var57', 'Var73', 'Var74', 'Var81']
----------
Var83 diff_metric: 0.0
----------
----------
Var85 diff_metric: 0.0
----------
----------
Var94 diff_metric: 0.0
----------
----------
Var109 diff_metric: 0.0
----------
----------
Var112 diff_metric: 0.0
----------
----------
Var113 diff_metric: -0.004649671106192743
----------
----------
good_num_columns: ['Var21', 'Var25', 'Var57', 'Var73', 'Var74', 'Var81', 'Var113']
----------
Var119 diff_metric: 0.0
----------
----------
Var123 diff_metric: 0.0
----------
----------
Var125 diff_metric: -0.000131773280189873
----------
----------
good_num_columns: ['Var21', 'Var25', 'Var57', 'Var73', 'Var74', 'Var81', 'Var113', 'Var125']
----------
Var126 diff_metric: -0.04384551735429898
----------
----------
good_num_columns: ['Var21', 'Var25', 'Var57', 'Var73', 'Var74', 'Var81', 'Var113', 'Var125', 'Var126']
----------
Var133 diff_metric: -0.0011645854318841353
----------
----------
good_num_columns: ['Var21', 'Var25', 'Var57', 'Var73', 'Var74', 'Var81', 'Var113', 'Var125', 'Var126', 'Var133']
----------
Var134 diff_metric: 0.00010354335023332606
----------
----------
Var140 diff_metric: 0.0
----------
----------
Var149 diff_metric: 0.0
----------
----------
Var153 diff_metric: 0.0
----------
----------
Var160 diff_metric: 0.0
----------
----------
Var163 diff_metric: 0.00040610770665994167
----------
----------
Var189 diff_metric: -0.004954755992079951
----------
----------
good_num_columns: ['Var21', 'Var25', 'Var57', 'Var73', 'Var74', 'Var81', 'Var113', 'Var125', 'Var126', 'Var133', 'Var189']
----------
Var7 diff_metric: 0.0
----------
----------
Var35 diff_metric: -1.592974618969123e-05
----------
----------
good_cat_columns: ['Var35']
----------
Var44 diff_metric: -4.0328471373474173e-07
----------
----------
good_cat_columns: ['Var35', 'Var44']
----------
Var65 diff_metric: 6.12992764770226e-05
----------
----------
Var72 diff_metric: 0.0
----------
----------
Var78 diff_metric: 8.700867697308823e-05
----------
----------
Var132 diff_metric: -6.876004367972754e-05
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132']
----------
Var143 diff_metric: 0.0
----------
----------
Var144 diff_metric: 0.0
----------
----------
Var173 diff_metric: 0.0
----------
----------
Var181 diff_metric: 0.0
----------
----------
Var194 diff_metric: 0.0
----------
----------
Var195 diff_metric: -7.964873094845615e-06
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195']
----------
Var196 diff_metric: 0.0
----------
----------
Var201 diff_metric: 0.0
----------
----------
Var203 diff_metric: 0.0
----------
----------
Var205 diff_metric: -0.0002241254796189196
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205']
----------
Var206 diff_metric: 9.567929831688637e-05
----------
----------
Var207 diff_metric: -1.1997720231415876e-05
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207']
----------
Var208 diff_metric: 0.0
----------
----------
Var210 diff_metric: -0.0014500101879800775
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210']
----------
Var211 diff_metric: 0.0
----------
----------
Var218 diff_metric: 0.00028592886198797185
----------
----------
Var219 diff_metric: 0.00020325549568689372
----------
----------
Var221 diff_metric: 0.0
----------
----------
Var223 diff_metric: 0.0
----------
----------
Var225 diff_metric: 0.0
----------
----------
Var226 diff_metric: 0.0002179753877357138
----------
----------
Var227 diff_metric: -5.041058920296493e-07
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227']
----------
Var229 diff_metric: 0.0
----------
----------
Var192 diff_metric: -0.001532179448389126
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192']
----------
Var193 diff_metric: -0.00041992020810288633
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193']
----------
Var197 diff_metric: -0.0001792600552237289
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197']
----------
Var198 diff_metric: 2.0869983932136904e-05
----------
----------
Var199 diff_metric: -0.005523185795989871
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199']
----------
Var202 diff_metric: -0.00035005113146069355
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202']
----------
Var204 diff_metric: 0.00021091790524641052
----------
----------
Var212 diff_metric: 4.526870910881442e-05
----------
----------
Var216 diff_metric: -0.00043131300126397765
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216']
----------
Var217 diff_metric: -0.00047214557852248706
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217']
----------
Var220 diff_metric: 8.146351216020697e-05
----------
----------
Var222 diff_metric: -2.4802009890412258e-05
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222']
----------
Var228 diff_metric: -1.592974618969123e-05
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228']
----------
Var6,Var13_mix diff_metric: 0.0
----------
----------
Var6,Var21_mix diff_metric: 0.0
----------
----------
Var6,Var22_mix diff_metric: 0.0
----------
----------
Var6,Var24_mix diff_metric: -6.744936836022841e-05
----------
----------
good_num_columns: ['Var21', 'Var25', 'Var57', 'Var73', 'Var74', 'Var81', 'Var113', 'Var125', 'Var126', 'Var133', 'Var189', 'Var6,Var24_mix']
----------
Var6,Var25_mix diff_metric: 0.0
----------
----------
Var6,Var28_mix diff_metric: -9.930886073983292e-05
----------
----------
good_num_columns: ['Var21', 'Var25', 'Var57', 'Var73', 'Var74', 'Var81', 'Var113', 'Var125', 'Var126', 'Var133', 'Var189', 'Var6,Var24_mix', 'Var6,Var28_mix']
----------
Var6,Var38_mix diff_metric: 0.0
----------
----------
Var6,Var57_mix diff_metric: 0.0
----------
----------
Var6,Var73_mix diff_metric: 0.0005307226831822165
----------
----------
Var6,Var74_mix diff_metric: 0.0
----------
----------
Var6,Var76_mix diff_metric: 0.0
----------
----------
Var6,Var81_mix diff_metric: 0.0001330839555091501
----------
----------
Var6,Var83_mix diff_metric: 0.0009338057544897582
----------
----------
Var6,Var85_mix diff_metric: 0.0
----------
----------
Var6,Var94_mix diff_metric: 0.0
----------
----------
Var6,Var109_mix diff_metric: 0.0
----------
----------
Var6,Var112_mix diff_metric: 0.0
----------
----------
Var6,Var113_mix diff_metric: 0.0
----------
----------
Var6,Var119_mix diff_metric: 0.0
----------
----------
Var6,Var123_mix diff_metric: 0.0
----------
----------
Var6,Var125_mix diff_metric: 0.0
----------
----------
Var6,Var126_mix diff_metric: -6.422309065101661e-05
----------
----------
good_num_columns: ['Var21', 'Var25', 'Var57', 'Var73', 'Var74', 'Var81', 'Var113', 'Var125', 'Var126', 'Var133', 'Var189', 'Var6,Var24_mix', 'Var6,Var28_mix', 'Var6,Var126_mix']
----------
Var6,Var133_mix diff_metric: 0.0
----------
----------
Var6,Var134_mix diff_metric: 0.0
----------
----------
Var6,Var140_mix diff_metric: 0.0
----------
----------
Var6,Var149_mix diff_metric: 1.592974618969123e-05
----------
----------
Var6,Var153_mix diff_metric: 0.0
----------
----------
Var6,Var160_mix diff_metric: 0.0
----------
----------
Var6,Var163_mix diff_metric: 0.0
----------
----------
Var6,Var189_mix diff_metric: 0.0
----------
----------
Var13,Var21_mix diff_metric: 0.0
----------
----------
Var13,Var22_mix diff_metric: 0.0
----------
----------
Var13,Var24_mix diff_metric: 0.0
----------
----------
Var13,Var25_mix diff_metric: -0.00031284811662501966
----------
----------
good_num_columns: ['Var21', 'Var25', 'Var57', 'Var73', 'Var74', 'Var81', 'Var113', 'Var125', 'Var126', 'Var133', 'Var189', 'Var6,Var24_mix', 'Var6,Var28_mix', 'Var6,Var126_mix', 'Var13,Var25_mix']
----------
Var13,Var28_mix diff_metric: -0.00010596305851540144
----------
----------
good_num_columns: ['Var21', 'Var25', 'Var57', 'Var73', 'Var74', 'Var81', 'Var113', 'Var125', 'Var126', 'Var133', 'Var189', 'Var6,Var24_mix', 'Var6,Var28_mix', 'Var6,Var126_mix', 'Var13,Var25_mix', 'Var13,Var28_mix']
----------
Var13,Var38_mix diff_metric: -3.4581664196786654e-05
----------
----------
good_num_columns: ['Var21', 'Var25', 'Var57', 'Var73', 'Var74', 'Var81', 'Var113', 'Var125', 'Var126', 'Var133', 'Var189', 'Var6,Var24_mix', 'Var6,Var28_mix', 'Var6,Var126_mix', 'Var13,Var25_mix', 'Var13,Var28_mix', 'Var13,Var38_mix']
----------
Var13,Var57_mix diff_metric: 0.0
----------
----------
Var13,Var73_mix diff_metric: -0.0002870378949506591
----------
----------
good_num_columns: ['Var21', 'Var25', 'Var57', 'Var73', 'Var74', 'Var81', 'Var113', 'Var125', 'Var126', 'Var133', 'Var189', 'Var6,Var24_mix', 'Var6,Var28_mix', 'Var6,Var126_mix', 'Var13,Var25_mix', 'Var13,Var28_mix', 'Var13,Var38_mix', 'Var13,Var73_mix']
----------
Var13,Var74_mix diff_metric: -0.0004601478582910712
----------
----------
good_num_columns: ['Var21', 'Var25', 'Var57', 'Var73', 'Var74', 'Var81', 'Var113', 'Var125', 'Var126', 'Var133', 'Var189', 'Var6,Var24_mix', 'Var6,Var28_mix', 'Var6,Var126_mix', 'Var13,Var25_mix', 'Var13,Var28_mix', 'Var13,Var38_mix', 'Var13,Var73_mix', 'Var13,Var74_mix']
----------
Var13,Var76_mix diff_metric: 0.0
----------
----------
Var13,Var81_mix diff_metric: 0.0
----------
----------
Var13,Var83_mix diff_metric: 0.0
----------
----------
Var13,Var85_mix diff_metric: 0.0
----------
----------
Var13,Var94_mix diff_metric: 0.0
----------
----------
Var13,Var109_mix diff_metric: -0.0005178175723449252
----------
----------
good_num_columns: ['Var21', 'Var25', 'Var57', 'Var73', 'Var74', 'Var81', 'Var113', 'Var125', 'Var126', 'Var133', 'Var189', 'Var6,Var24_mix', 'Var6,Var28_mix', 'Var6,Var126_mix', 'Var13,Var25_mix', 'Var13,Var28_mix', 'Var13,Var38_mix', 'Var13,Var73_mix', 'Var13,Var74_mix', 'Var13,Var109_mix']
----------
Var13,Var112_mix diff_metric: 3.165785002268162e-05
----------
----------
Var13,Var113_mix diff_metric: -9.063823939614579e-05
----------
----------
good_num_columns: ['Var21', 'Var25', 'Var57', 'Var73', 'Var74', 'Var81', 'Var113', 'Var125', 'Var126', 'Var133', 'Var189', 'Var6,Var24_mix', 'Var6,Var28_mix', 'Var6,Var126_mix', 'Var13,Var25_mix', 'Var13,Var28_mix', 'Var13,Var38_mix', 'Var13,Var73_mix', 'Var13,Var74_mix', 'Var13,Var109_mix', 'Var13,Var113_mix']
----------
Var13,Var119_mix diff_metric: 0.0
----------
----------
Var13,Var123_mix diff_metric: 0.0
----------
----------
Var13,Var125_mix diff_metric: 0.0003761638166704939
----------
----------
Var13,Var126_mix diff_metric: -0.01039657909708791
----------
----------
good_num_columns: ['Var21', 'Var25', 'Var57', 'Var73', 'Var74', 'Var81', 'Var113', 'Var125', 'Var126', 'Var133', 'Var189', 'Var6,Var24_mix', 'Var6,Var28_mix', 'Var6,Var126_mix', 'Var13,Var25_mix', 'Var13,Var28_mix', 'Var13,Var38_mix', 'Var13,Var73_mix', 'Var13,Var74_mix', 'Var13,Var109_mix', 'Var13,Var113_mix', 'Var13,Var126_mix']
----------
Var13,Var133_mix diff_metric: 0.0
----------
----------
Var13,Var134_mix diff_metric: 0.0
----------
----------
Var13,Var140_mix diff_metric: 9.063823939603477e-05
----------
----------
Var13,Var149_mix diff_metric: 0.0
----------
----------
Var13,Var153_mix diff_metric: 0.0
----------
----------
Var13,Var160_mix diff_metric: 0.0
----------
----------
Var13,Var163_mix diff_metric: 2.6717612280124925e-05
----------
----------
Var13,Var189_mix diff_metric: -0.001317430338362957
----------
----------
good_num_columns: ['Var21', 'Var25', 'Var57', 'Var73', 'Var74', 'Var81', 'Var113', 'Var125', 'Var126', 'Var133', 'Var189', 'Var6,Var24_mix', 'Var6,Var28_mix', 'Var6,Var126_mix', 'Var13,Var25_mix', 'Var13,Var28_mix', 'Var13,Var38_mix', 'Var13,Var73_mix', 'Var13,Var74_mix', 'Var13,Var109_mix', 'Var13,Var113_mix', 'Var13,Var126_mix', 'Var13,Var189_mix']
----------
Var21,Var22_mix diff_metric: 0.0
----------
----------
Var21,Var24_mix diff_metric: 0.0
----------
----------
Var21,Var25_mix diff_metric: 0.0
----------
----------
Var21,Var28_mix diff_metric: 0.0
----------
----------
Var21,Var38_mix diff_metric: -6.694526246842081e-05
----------
----------
good_num_columns: ['Var21', 'Var25', 'Var57', 'Var73', 'Var74', 'Var81', 'Var113', 'Var125', 'Var126', 'Var133', 'Var189', 'Var6,Var24_mix', 'Var6,Var28_mix', 'Var6,Var126_mix', 'Var13,Var25_mix', 'Var13,Var28_mix', 'Var13,Var38_mix', 'Var13,Var73_mix', 'Var13,Var74_mix', 'Var13,Var109_mix', 'Var13,Var113_mix', 'Var13,Var126_mix', 'Var13,Var189_mix', 'Var21,Var38_mix']
----------
Var21,Var57_mix diff_metric: 0.0
----------
----------
Var21,Var73_mix diff_metric: 0.0
----------
----------
Var21,Var74_mix diff_metric: 0.0
----------
----------
Var21,Var76_mix diff_metric: -1.1291971982463345e-05
----------
----------
good_num_columns: ['Var21', 'Var25', 'Var57', 'Var73', 'Var74', 'Var81', 'Var113', 'Var125', 'Var126', 'Var133', 'Var189', 'Var6,Var24_mix', 'Var6,Var28_mix', 'Var6,Var126_mix', 'Var13,Var25_mix', 'Var13,Var28_mix', 'Var13,Var38_mix', 'Var13,Var73_mix', 'Var13,Var74_mix', 'Var13,Var109_mix', 'Var13,Var113_mix', 'Var13,Var126_mix', 'Var13,Var189_mix', 'Var21,Var38_mix', 'Var21,Var76_mix']
----------
Var21,Var81_mix diff_metric: 0.0
----------
----------
Var21,Var83_mix diff_metric: 0.0
----------
----------
Var21,Var85_mix diff_metric: 0.0
----------
----------
Var21,Var94_mix diff_metric: 0.0
----------
----------
Var21,Var109_mix diff_metric: 0.0
----------
----------
Var21,Var112_mix diff_metric: 0.0
----------
----------
Var21,Var113_mix diff_metric: 0.00039400916525011986
----------
----------
Var21,Var119_mix diff_metric: 0.0
----------
----------
Var21,Var123_mix diff_metric: 0.0
----------
----------
Var21,Var125_mix diff_metric: 0.0
----------
----------
Var21,Var126_mix diff_metric: -0.0014755179461193313
----------
----------
good_num_columns: ['Var21', 'Var25', 'Var57', 'Var73', 'Var74', 'Var81', 'Var113', 'Var125', 'Var126', 'Var133', 'Var189', 'Var6,Var24_mix', 'Var6,Var28_mix', 'Var6,Var126_mix', 'Var13,Var25_mix', 'Var13,Var28_mix', 'Var13,Var38_mix', 'Var13,Var73_mix', 'Var13,Var74_mix', 'Var13,Var109_mix', 'Var13,Var113_mix', 'Var13,Var126_mix', 'Var13,Var189_mix', 'Var21,Var38_mix', 'Var21,Var76_mix', 'Var21,Var126_mix']
----------
Var21,Var133_mix diff_metric: -0.00016282620314200802
----------
----------
good_num_columns: ['Var21', 'Var25', 'Var57', 'Var73', 'Var74', 'Var81', 'Var113', 'Var125', 'Var126', 'Var133', 'Var189', 'Var6,Var24_mix', 'Var6,Var28_mix', 'Var6,Var126_mix', 'Var13,Var25_mix', 'Var13,Var28_mix', 'Var13,Var38_mix', 'Var13,Var73_mix', 'Var13,Var74_mix', 'Var13,Var109_mix', 'Var13,Var113_mix', 'Var13,Var126_mix', 'Var13,Var189_mix', 'Var21,Var38_mix', 'Var21,Var76_mix', 'Var21,Var126_mix', 'Var21,Var133_mix']
----------
Var21,Var134_mix diff_metric: -4.9301556245495703e-05
----------
----------
good_num_columns: ['Var21', 'Var25', 'Var57', 'Var73', 'Var74', 'Var81', 'Var113', 'Var125', 'Var126', 'Var133', 'Var189', 'Var6,Var24_mix', 'Var6,Var28_mix', 'Var6,Var126_mix', 'Var13,Var25_mix', 'Var13,Var28_mix', 'Var13,Var38_mix', 'Var13,Var73_mix', 'Var13,Var74_mix', 'Var13,Var109_mix', 'Var13,Var113_mix', 'Var13,Var126_mix', 'Var13,Var189_mix', 'Var21,Var38_mix', 'Var21,Var76_mix', 'Var21,Var126_mix', 'Var21,Var133_mix', 'Var21,Var134_mix']
----------
Var21,Var140_mix diff_metric: 0.0
----------
----------
Var21,Var149_mix diff_metric: -0.00048242933872100213
----------
----------
good_num_columns: ['Var21', 'Var25', 'Var57', 'Var73', 'Var74', 'Var81', 'Var113', 'Var125', 'Var126', 'Var133', 'Var189', 'Var6,Var24_mix', 'Var6,Var28_mix', 'Var6,Var126_mix', 'Var13,Var25_mix', 'Var13,Var28_mix', 'Var13,Var38_mix', 'Var13,Var73_mix', 'Var13,Var74_mix', 'Var13,Var109_mix', 'Var13,Var113_mix', 'Var13,Var126_mix', 'Var13,Var189_mix', 'Var21,Var38_mix', 'Var21,Var76_mix', 'Var21,Var126_mix', 'Var21,Var133_mix', 'Var21,Var134_mix', 'Var21,Var149_mix']
----------
Var21,Var153_mix diff_metric: 0.0
----------
----------
Var21,Var160_mix diff_metric: -3.054881706010537e-05
----------
----------
good_num_columns: ['Var21', 'Var25', 'Var57', 'Var73', 'Var74', 'Var81', 'Var113', 'Var125', 'Var126', 'Var133', 'Var189', 'Var6,Var24_mix', 'Var6,Var28_mix', 'Var6,Var126_mix', 'Var13,Var25_mix', 'Var13,Var28_mix', 'Var13,Var38_mix', 'Var13,Var73_mix', 'Var13,Var74_mix', 'Var13,Var109_mix', 'Var13,Var113_mix', 'Var13,Var126_mix', 'Var13,Var189_mix', 'Var21,Var38_mix', 'Var21,Var76_mix', 'Var21,Var126_mix', 'Var21,Var133_mix', 'Var21,Var134_mix', 'Var21,Var149_mix', 'Var21,Var160_mix']
----------
Var21,Var163_mix diff_metric: -9.850229131258548e-05
----------
----------
good_num_columns: ['Var21', 'Var25', 'Var57', 'Var73', 'Var74', 'Var81', 'Var113', 'Var125', 'Var126', 'Var133', 'Var189', 'Var6,Var24_mix', 'Var6,Var28_mix', 'Var6,Var126_mix', 'Var13,Var25_mix', 'Var13,Var28_mix', 'Var13,Var38_mix', 'Var13,Var73_mix', 'Var13,Var74_mix', 'Var13,Var109_mix', 'Var13,Var113_mix', 'Var13,Var126_mix', 'Var13,Var189_mix', 'Var21,Var38_mix', 'Var21,Var76_mix', 'Var21,Var126_mix', 'Var21,Var133_mix', 'Var21,Var134_mix', 'Var21,Var149_mix', 'Var21,Var160_mix', 'Var21,Var163_mix']
----------
Var21,Var189_mix diff_metric: 0.0
----------
----------
Var22,Var24_mix diff_metric: 0.0
----------
----------
Var22,Var25_mix diff_metric: 0.0
----------
----------
Var22,Var28_mix diff_metric: 0.0
----------
----------
Var22,Var38_mix diff_metric: 0.0
----------
----------
Var22,Var57_mix diff_metric: -0.00021706799712983837
----------
----------
good_num_columns: ['Var21', 'Var25', 'Var57', 'Var73', 'Var74', 'Var81', 'Var113', 'Var125', 'Var126', 'Var133', 'Var189', 'Var6,Var24_mix', 'Var6,Var28_mix', 'Var6,Var126_mix', 'Var13,Var25_mix', 'Var13,Var28_mix', 'Var13,Var38_mix', 'Var13,Var73_mix', 'Var13,Var74_mix', 'Var13,Var109_mix', 'Var13,Var113_mix', 'Var13,Var126_mix', 'Var13,Var189_mix', 'Var21,Var38_mix', 'Var21,Var76_mix', 'Var21,Var126_mix', 'Var21,Var133_mix', 'Var21,Var134_mix', 'Var21,Var149_mix', 'Var21,Var160_mix', 'Var21,Var163_mix', 'Var22,Var57_mix']
----------
Var22,Var73_mix diff_metric: 0.0004361524178280174
----------
----------
Var22,Var74_mix diff_metric: 0.0
----------
----------
Var22,Var76_mix diff_metric: 0.0
----------
----------
Var22,Var81_mix diff_metric: 0.0
----------
----------
Var22,Var83_mix diff_metric: 0.0
----------
----------
Var22,Var85_mix diff_metric: 0.0
----------
----------
Var22,Var94_mix diff_metric: 0.0
----------
----------
Var22,Var109_mix diff_metric: 0.0
----------
----------
Var7,Var35_mix diff_metric: -0.0001265305789120985
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix']
----------
Var7,Var44_mix diff_metric: -1.1493614339386227e-05
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix']
----------
Var7,Var65_mix diff_metric: 7.753148620193162e-05
----------
----------
Var7,Var72_mix diff_metric: 7.188550021064444e-05
----------
----------
Var7,Var78_mix diff_metric: 3.438002183986377e-05
----------
----------
Var7,Var132_mix diff_metric: -6.73485471819335e-05
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix']
----------
Var7,Var143_mix diff_metric: 2.6213506388206298e-05
----------
----------
Var7,Var144_mix diff_metric: -2.84315723133588e-05
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix']
----------
Var7,Var173_mix diff_metric: 0.0
----------
----------
Var7,Var181_mix diff_metric: -1.2300183766744688e-05
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix']
----------
Var7,Var194_mix diff_metric: 8.680703461627637e-05
----------
----------
Var7,Var195_mix diff_metric: 3.3674273591022263e-05
----------
----------
Var7,Var196_mix diff_metric: -0.0001403430803551542
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix']
----------
Var7,Var201_mix diff_metric: -7.904380387813159e-05
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix', 'Var7,Var201_mix']
----------
Var7,Var203_mix diff_metric: 2.9338962919012168e-05
----------
----------
Var7,Var205_mix diff_metric: -7.763230738033755e-05
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix', 'Var7,Var201_mix', 'Var7,Var205_mix']
----------
Var7,Var206_mix diff_metric: -0.00011554107046474194
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix', 'Var7,Var201_mix', 'Var7,Var205_mix', 'Var7,Var206_mix']
----------
Var7,Var207_mix diff_metric: 0.00017583213515770524
----------
----------
Var7,Var208_mix diff_metric: -0.00010243431727074981
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix', 'Var7,Var201_mix', 'Var7,Var205_mix', 'Var7,Var206_mix', 'Var7,Var208_mix']
----------
Var7,Var210_mix diff_metric: 1.7139600330118299e-06
----------
----------
Var7,Var211_mix diff_metric: 0.00013983897446323557
----------
----------
Var7,Var218_mix diff_metric: -0.0008529471693999868
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix', 'Var7,Var201_mix', 'Var7,Var205_mix', 'Var7,Var206_mix', 'Var7,Var208_mix', 'Var7,Var218_mix']
----------
Var7,Var219_mix diff_metric: -0.0004830342657914377
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix', 'Var7,Var201_mix', 'Var7,Var205_mix', 'Var7,Var206_mix', 'Var7,Var208_mix', 'Var7,Var218_mix', 'Var7,Var219_mix']
----------
Var7,Var221_mix diff_metric: -0.00034984948910377067
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix', 'Var7,Var201_mix', 'Var7,Var205_mix', 'Var7,Var206_mix', 'Var7,Var208_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix']
----------
Var7,Var223_mix diff_metric: 8.620210754595181e-05
----------
----------
Var7,Var225_mix diff_metric: 0.0005529033424338525
----------
----------
Var7,Var226_mix diff_metric: 0.00014709809930912865
----------
----------
Var7,Var227_mix diff_metric: 2.5810221674471556e-05
----------
----------
Var7,Var229_mix diff_metric: -0.0003378517688722438
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix', 'Var7,Var201_mix', 'Var7,Var205_mix', 'Var7,Var206_mix', 'Var7,Var208_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var229_mix']
----------
Var7,Var192_mix diff_metric: 8.579882283199503e-05
----------
----------
Var7,Var193_mix diff_metric: -5.615739637787609e-05
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix', 'Var7,Var201_mix', 'Var7,Var205_mix', 'Var7,Var206_mix', 'Var7,Var208_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var229_mix', 'Var7,Var193_mix']
----------
Var7,Var197_mix diff_metric: -0.00036567841411505597
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix', 'Var7,Var201_mix', 'Var7,Var205_mix', 'Var7,Var206_mix', 'Var7,Var208_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var229_mix', 'Var7,Var193_mix', 'Var7,Var197_mix']
----------
Var7,Var198_mix diff_metric: 0.00027070486404723315
----------
----------
Var7,Var199_mix diff_metric: 3.6598087765016274e-05
----------
----------
Var7,Var202_mix diff_metric: 0.0005214471347679828
----------
----------
Var7,Var204_mix diff_metric: -2.621350638887243e-06
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix', 'Var7,Var201_mix', 'Var7,Var205_mix', 'Var7,Var206_mix', 'Var7,Var208_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var229_mix', 'Var7,Var193_mix', 'Var7,Var197_mix', 'Var7,Var204_mix']
----------
Var7,Var212_mix diff_metric: -0.0032882827340404708
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix', 'Var7,Var201_mix', 'Var7,Var205_mix', 'Var7,Var206_mix', 'Var7,Var208_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var229_mix', 'Var7,Var193_mix', 'Var7,Var197_mix', 'Var7,Var204_mix', 'Var7,Var212_mix']
----------
Var7,Var216_mix diff_metric: -0.0006225707767192334
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix', 'Var7,Var201_mix', 'Var7,Var205_mix', 'Var7,Var206_mix', 'Var7,Var208_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var229_mix', 'Var7,Var193_mix', 'Var7,Var197_mix', 'Var7,Var204_mix', 'Var7,Var212_mix', 'Var7,Var216_mix']
----------
Var7,Var217_mix diff_metric: -0.00032635815453285755
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix', 'Var7,Var201_mix', 'Var7,Var205_mix', 'Var7,Var206_mix', 'Var7,Var208_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var229_mix', 'Var7,Var193_mix', 'Var7,Var197_mix', 'Var7,Var204_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix']
----------
Var7,Var220_mix diff_metric: 0.0002654621627695697
----------
----------
Var7,Var222_mix diff_metric: 0.00016322948785552072
----------
----------
Var7,Var228_mix diff_metric: -0.00016000321014630892
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix', 'Var7,Var201_mix', 'Var7,Var205_mix', 'Var7,Var206_mix', 'Var7,Var208_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var229_mix', 'Var7,Var193_mix', 'Var7,Var197_mix', 'Var7,Var204_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var228_mix']
----------
Var35,Var44_mix diff_metric: 0.0
----------
----------
Var35,Var65_mix diff_metric: 0.0006997997993860583
----------
----------
Var35,Var72_mix diff_metric: 0.00013227738608190265
----------
----------
Var35,Var78_mix diff_metric: -0.00010969344211675391
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix', 'Var7,Var201_mix', 'Var7,Var205_mix', 'Var7,Var206_mix', 'Var7,Var208_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var229_mix', 'Var7,Var193_mix', 'Var7,Var197_mix', 'Var7,Var204_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var228_mix', 'Var35,Var78_mix']
----------
Var35,Var132_mix diff_metric: -0.00020859901814274107
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix', 'Var7,Var201_mix', 'Var7,Var205_mix', 'Var7,Var206_mix', 'Var7,Var208_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var229_mix', 'Var7,Var193_mix', 'Var7,Var197_mix', 'Var7,Var204_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var228_mix', 'Var35,Var78_mix', 'Var35,Var132_mix']
----------
Var35,Var143_mix diff_metric: -0.0001665565867434715
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix', 'Var7,Var201_mix', 'Var7,Var205_mix', 'Var7,Var206_mix', 'Var7,Var208_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var229_mix', 'Var7,Var193_mix', 'Var7,Var197_mix', 'Var7,Var204_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var228_mix', 'Var35,Var78_mix', 'Var35,Var132_mix', 'Var35,Var143_mix']
----------
Var35,Var144_mix diff_metric: -0.0002744352476485856
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix', 'Var7,Var201_mix', 'Var7,Var205_mix', 'Var7,Var206_mix', 'Var7,Var208_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var229_mix', 'Var7,Var193_mix', 'Var7,Var197_mix', 'Var7,Var204_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var228_mix', 'Var35,Var78_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix']
----------
Var35,Var173_mix diff_metric: 0.00024479382119424464
----------
----------
Var35,Var181_mix diff_metric: 8.237090276597137e-05
----------
----------
Var35,Var194_mix diff_metric: 4.294982200525599e-05
----------
----------
Var35,Var195_mix diff_metric: -5.595575402095321e-05
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix', 'Var7,Var201_mix', 'Var7,Var205_mix', 'Var7,Var206_mix', 'Var7,Var208_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var229_mix', 'Var7,Var193_mix', 'Var7,Var197_mix', 'Var7,Var204_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var228_mix', 'Var35,Var78_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var195_mix']
----------
Var35,Var196_mix diff_metric: -4.113504079372721e-05
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix', 'Var7,Var201_mix', 'Var7,Var205_mix', 'Var7,Var206_mix', 'Var7,Var208_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var229_mix', 'Var7,Var193_mix', 'Var7,Var197_mix', 'Var7,Var204_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var228_mix', 'Var35,Var78_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var195_mix', 'Var35,Var196_mix']
----------
Var35,Var201_mix diff_metric: 0.00018722492831868554
----------
----------
Var35,Var203_mix diff_metric: 1.7946169758142894e-05
----------
----------
Var35,Var205_mix diff_metric: 0.0004605511430046949
----------
----------
Var35,Var206_mix diff_metric: 4.305064318366192e-05
----------
----------
Var35,Var207_mix diff_metric: 1.673631561693867e-05
----------
----------
Var35,Var208_mix diff_metric: -0.00021313597117156302
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix', 'Var7,Var201_mix', 'Var7,Var205_mix', 'Var7,Var206_mix', 'Var7,Var208_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var229_mix', 'Var7,Var193_mix', 'Var7,Var197_mix', 'Var7,Var204_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var228_mix', 'Var35,Var78_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var195_mix', 'Var35,Var196_mix', 'Var35,Var208_mix']
----------
Var35,Var210_mix diff_metric: 3.589233951606374e-05
----------
----------
Var35,Var211_mix diff_metric: -0.00015496215122556833
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix', 'Var7,Var201_mix', 'Var7,Var205_mix', 'Var7,Var206_mix', 'Var7,Var208_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var229_mix', 'Var7,Var193_mix', 'Var7,Var197_mix', 'Var7,Var204_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var228_mix', 'Var35,Var78_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var195_mix', 'Var35,Var196_mix', 'Var35,Var208_mix', 'Var35,Var211_mix']
----------
Var35,Var218_mix diff_metric: -0.0018411963602346093
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix', 'Var7,Var201_mix', 'Var7,Var205_mix', 'Var7,Var206_mix', 'Var7,Var208_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var229_mix', 'Var7,Var193_mix', 'Var7,Var197_mix', 'Var7,Var204_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var228_mix', 'Var35,Var78_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var195_mix', 'Var35,Var196_mix', 'Var35,Var208_mix', 'Var35,Var211_mix', 'Var35,Var218_mix']
----------
Var35,Var219_mix diff_metric: -0.00015879335600532674
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix', 'Var7,Var201_mix', 'Var7,Var205_mix', 'Var7,Var206_mix', 'Var7,Var208_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var229_mix', 'Var7,Var193_mix', 'Var7,Var197_mix', 'Var7,Var204_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var228_mix', 'Var35,Var78_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var195_mix', 'Var35,Var196_mix', 'Var35,Var208_mix', 'Var35,Var211_mix', 'Var35,Var218_mix', 'Var35,Var219_mix']
----------
Var35,Var221_mix diff_metric: -3.0246353525109626e-06
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix', 'Var7,Var201_mix', 'Var7,Var205_mix', 'Var7,Var206_mix', 'Var7,Var208_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var229_mix', 'Var7,Var193_mix', 'Var7,Var197_mix', 'Var7,Var204_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var228_mix', 'Var35,Var78_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var195_mix', 'Var35,Var196_mix', 'Var35,Var208_mix', 'Var35,Var211_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var221_mix']
----------
Var35,Var223_mix diff_metric: 0.00010364417141173199
----------
----------
Var35,Var225_mix diff_metric: 0.0013112802464795292
----------
----------
Var35,Var226_mix diff_metric: 0.0006816519872711035
----------
----------
Var35,Var227_mix diff_metric: 0.00014175457685305926
----------
----------
Var35,Var229_mix diff_metric: 0.0006835675896611493
----------
----------
Var35,Var192_mix diff_metric: -0.002498852907042659
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix', 'Var7,Var201_mix', 'Var7,Var205_mix', 'Var7,Var206_mix', 'Var7,Var208_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var229_mix', 'Var7,Var193_mix', 'Var7,Var197_mix', 'Var7,Var204_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var228_mix', 'Var35,Var78_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var195_mix', 'Var35,Var196_mix', 'Var35,Var208_mix', 'Var35,Var211_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var221_mix', 'Var35,Var192_mix']
----------
Var35,Var193_mix diff_metric: 0.0002778631677147203
----------
----------
Var35,Var197_mix diff_metric: 1.1392793160980297e-05
----------
----------
Var35,Var198_mix diff_metric: 8.741196168671195e-05
----------
----------
Var35,Var199_mix diff_metric: -0.0006399120194068297
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix', 'Var7,Var201_mix', 'Var7,Var205_mix', 'Var7,Var206_mix', 'Var7,Var208_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var229_mix', 'Var7,Var193_mix', 'Var7,Var197_mix', 'Var7,Var204_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var228_mix', 'Var35,Var78_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var195_mix', 'Var35,Var196_mix', 'Var35,Var208_mix', 'Var35,Var211_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var221_mix', 'Var35,Var192_mix', 'Var35,Var199_mix']
----------
Var35,Var202_mix diff_metric: -0.00045127559459035016
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix', 'Var7,Var201_mix', 'Var7,Var205_mix', 'Var7,Var206_mix', 'Var7,Var208_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var229_mix', 'Var7,Var193_mix', 'Var7,Var197_mix', 'Var7,Var204_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var228_mix', 'Var35,Var78_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var195_mix', 'Var35,Var196_mix', 'Var35,Var208_mix', 'Var35,Var211_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var221_mix', 'Var35,Var192_mix', 'Var35,Var199_mix', 'Var35,Var202_mix']
----------
Var35,Var204_mix diff_metric: -5.2729476311630386e-05
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix', 'Var7,Var201_mix', 'Var7,Var205_mix', 'Var7,Var206_mix', 'Var7,Var208_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var229_mix', 'Var7,Var193_mix', 'Var7,Var197_mix', 'Var7,Var204_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var228_mix', 'Var35,Var78_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var195_mix', 'Var35,Var196_mix', 'Var35,Var208_mix', 'Var35,Var211_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var221_mix', 'Var35,Var192_mix', 'Var35,Var199_mix', 'Var35,Var202_mix', 'Var35,Var204_mix']
----------
Var35,Var212_mix diff_metric: -0.0006865922250135492
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix', 'Var7,Var201_mix', 'Var7,Var205_mix', 'Var7,Var206_mix', 'Var7,Var208_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var229_mix', 'Var7,Var193_mix', 'Var7,Var197_mix', 'Var7,Var204_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var228_mix', 'Var35,Var78_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var195_mix', 'Var35,Var196_mix', 'Var35,Var208_mix', 'Var35,Var211_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var221_mix', 'Var35,Var192_mix', 'Var35,Var199_mix', 'Var35,Var202_mix', 'Var35,Var204_mix', 'Var35,Var212_mix']
----------
Var35,Var216_mix diff_metric: -0.00036245213640573315
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix', 'Var7,Var201_mix', 'Var7,Var205_mix', 'Var7,Var206_mix', 'Var7,Var208_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var229_mix', 'Var7,Var193_mix', 'Var7,Var197_mix', 'Var7,Var204_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var228_mix', 'Var35,Var78_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var195_mix', 'Var35,Var196_mix', 'Var35,Var208_mix', 'Var35,Var211_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var221_mix', 'Var35,Var192_mix', 'Var35,Var199_mix', 'Var35,Var202_mix', 'Var35,Var204_mix', 'Var35,Var212_mix', 'Var35,Var216_mix']
----------
Var35,Var217_mix diff_metric: -0.0006242847367523563
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix', 'Var7,Var201_mix', 'Var7,Var205_mix', 'Var7,Var206_mix', 'Var7,Var208_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var229_mix', 'Var7,Var193_mix', 'Var7,Var197_mix', 'Var7,Var204_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var228_mix', 'Var35,Var78_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var195_mix', 'Var35,Var196_mix', 'Var35,Var208_mix', 'Var35,Var211_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var221_mix', 'Var35,Var192_mix', 'Var35,Var199_mix', 'Var35,Var202_mix', 'Var35,Var204_mix', 'Var35,Var212_mix', 'Var35,Var216_mix', 'Var35,Var217_mix']
----------
Var35,Var220_mix diff_metric: 0.0006633025327993369
----------
----------
Var35,Var222_mix diff_metric: -6.119845529850565e-05
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix', 'Var7,Var201_mix', 'Var7,Var205_mix', 'Var7,Var206_mix', 'Var7,Var208_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var229_mix', 'Var7,Var193_mix', 'Var7,Var197_mix', 'Var7,Var204_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var228_mix', 'Var35,Var78_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var195_mix', 'Var35,Var196_mix', 'Var35,Var208_mix', 'Var35,Var211_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var221_mix', 'Var35,Var192_mix', 'Var35,Var199_mix', 'Var35,Var202_mix', 'Var35,Var204_mix', 'Var35,Var212_mix', 'Var35,Var216_mix', 'Var35,Var217_mix', 'Var35,Var222_mix']
----------
Var35,Var228_mix diff_metric: -0.00017663870458484165
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix', 'Var7,Var201_mix', 'Var7,Var205_mix', 'Var7,Var206_mix', 'Var7,Var208_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var229_mix', 'Var7,Var193_mix', 'Var7,Var197_mix', 'Var7,Var204_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var228_mix', 'Var35,Var78_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var195_mix', 'Var35,Var196_mix', 'Var35,Var208_mix', 'Var35,Var211_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var221_mix', 'Var35,Var192_mix', 'Var35,Var199_mix', 'Var35,Var202_mix', 'Var35,Var204_mix', 'Var35,Var212_mix', 'Var35,Var216_mix', 'Var35,Var217_mix', 'Var35,Var222_mix', 'Var35,Var228_mix']
----------
Var44,Var65_mix diff_metric: 0.000741741409607144
----------
----------
Var44,Var72_mix diff_metric: -0.00016635494438654863
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix', 'Var7,Var201_mix', 'Var7,Var205_mix', 'Var7,Var206_mix', 'Var7,Var208_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var229_mix', 'Var7,Var193_mix', 'Var7,Var197_mix', 'Var7,Var204_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var228_mix', 'Var35,Var78_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var195_mix', 'Var35,Var196_mix', 'Var35,Var208_mix', 'Var35,Var211_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var221_mix', 'Var35,Var192_mix', 'Var35,Var199_mix', 'Var35,Var202_mix', 'Var35,Var204_mix', 'Var35,Var212_mix', 'Var35,Var216_mix', 'Var35,Var217_mix', 'Var35,Var222_mix', 'Var35,Var228_mix', 'Var44,Var72_mix']
----------
Var44,Var78_mix diff_metric: 8.771442522204076e-05
----------
----------
Var44,Var132_mix diff_metric: -7.93462674134604e-05
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix', 'Var7,Var201_mix', 'Var7,Var205_mix', 'Var7,Var206_mix', 'Var7,Var208_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var229_mix', 'Var7,Var193_mix', 'Var7,Var197_mix', 'Var7,Var204_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var228_mix', 'Var35,Var78_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var195_mix', 'Var35,Var196_mix', 'Var35,Var208_mix', 'Var35,Var211_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var221_mix', 'Var35,Var192_mix', 'Var35,Var199_mix', 'Var35,Var202_mix', 'Var35,Var204_mix', 'Var35,Var212_mix', 'Var35,Var216_mix', 'Var35,Var217_mix', 'Var35,Var222_mix', 'Var35,Var228_mix', 'Var44,Var72_mix', 'Var44,Var132_mix']
----------
Var44,Var143_mix diff_metric: 5.807299876769978e-05
----------
----------
Var44,Var144_mix diff_metric: -4.879745035335503e-05
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix', 'Var7,Var201_mix', 'Var7,Var205_mix', 'Var7,Var206_mix', 'Var7,Var208_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var229_mix', 'Var7,Var193_mix', 'Var7,Var197_mix', 'Var7,Var204_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var228_mix', 'Var35,Var78_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var195_mix', 'Var35,Var196_mix', 'Var35,Var208_mix', 'Var35,Var211_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var221_mix', 'Var35,Var192_mix', 'Var35,Var199_mix', 'Var35,Var202_mix', 'Var35,Var204_mix', 'Var35,Var212_mix', 'Var35,Var216_mix', 'Var35,Var217_mix', 'Var35,Var222_mix', 'Var35,Var228_mix', 'Var44,Var72_mix', 'Var44,Var132_mix', 'Var44,Var144_mix']
----------
Var44,Var173_mix diff_metric: -6.412226947261068e-05
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix', 'Var7,Var201_mix', 'Var7,Var205_mix', 'Var7,Var206_mix', 'Var7,Var208_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var229_mix', 'Var7,Var193_mix', 'Var7,Var197_mix', 'Var7,Var204_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var228_mix', 'Var35,Var78_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var195_mix', 'Var35,Var196_mix', 'Var35,Var208_mix', 'Var35,Var211_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var221_mix', 'Var35,Var192_mix', 'Var35,Var199_mix', 'Var35,Var202_mix', 'Var35,Var204_mix', 'Var35,Var212_mix', 'Var35,Var216_mix', 'Var35,Var217_mix', 'Var35,Var222_mix', 'Var35,Var228_mix', 'Var44,Var72_mix', 'Var44,Var132_mix', 'Var44,Var144_mix', 'Var44,Var173_mix']
----------
Var44,Var181_mix diff_metric: 0.00021595896416737315
----------
----------
Var44,Var194_mix diff_metric: -5.444343634475324e-06
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix', 'Var7,Var201_mix', 'Var7,Var205_mix', 'Var7,Var206_mix', 'Var7,Var208_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var229_mix', 'Var7,Var193_mix', 'Var7,Var197_mix', 'Var7,Var204_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var228_mix', 'Var35,Var78_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var195_mix', 'Var35,Var196_mix', 'Var35,Var208_mix', 'Var35,Var211_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var221_mix', 'Var35,Var192_mix', 'Var35,Var199_mix', 'Var35,Var202_mix', 'Var35,Var204_mix', 'Var35,Var212_mix', 'Var35,Var216_mix', 'Var35,Var217_mix', 'Var35,Var222_mix', 'Var35,Var228_mix', 'Var44,Var72_mix', 'Var44,Var132_mix', 'Var44,Var144_mix', 'Var44,Var173_mix', 'Var44,Var194_mix']
----------
Var44,Var195_mix diff_metric: 4.859580799654317e-05
----------
----------
Var44,Var196_mix diff_metric: 6.533212361359286e-05
----------
----------
Var44,Var201_mix diff_metric: -0.0001609106007520733
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix', 'Var7,Var201_mix', 'Var7,Var205_mix', 'Var7,Var206_mix', 'Var7,Var208_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var229_mix', 'Var7,Var193_mix', 'Var7,Var197_mix', 'Var7,Var204_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var228_mix', 'Var35,Var78_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var195_mix', 'Var35,Var196_mix', 'Var35,Var208_mix', 'Var35,Var211_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var221_mix', 'Var35,Var192_mix', 'Var35,Var199_mix', 'Var35,Var202_mix', 'Var35,Var204_mix', 'Var35,Var212_mix', 'Var35,Var216_mix', 'Var35,Var217_mix', 'Var35,Var222_mix', 'Var35,Var228_mix', 'Var44,Var72_mix', 'Var44,Var132_mix', 'Var44,Var144_mix', 'Var44,Var173_mix', 'Var44,Var194_mix', 'Var44,Var201_mix']
----------
Var44,Var203_mix diff_metric: 5.2628655133224456e-05
----------
----------
Var44,Var205_mix diff_metric: 0.0007171410420735436
----------
----------
Var44,Var206_mix diff_metric: 0.00037818024023872354
----------
----------
Var44,Var207_mix diff_metric: -7.662409559627825e-06
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix', 'Var7,Var201_mix', 'Var7,Var205_mix', 'Var7,Var206_mix', 'Var7,Var208_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var229_mix', 'Var7,Var193_mix', 'Var7,Var197_mix', 'Var7,Var204_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var228_mix', 'Var35,Var78_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var195_mix', 'Var35,Var196_mix', 'Var35,Var208_mix', 'Var35,Var211_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var221_mix', 'Var35,Var192_mix', 'Var35,Var199_mix', 'Var35,Var202_mix', 'Var35,Var204_mix', 'Var35,Var212_mix', 'Var35,Var216_mix', 'Var35,Var217_mix', 'Var35,Var222_mix', 'Var35,Var228_mix', 'Var44,Var72_mix', 'Var44,Var132_mix', 'Var44,Var144_mix', 'Var44,Var173_mix', 'Var44,Var194_mix', 'Var44,Var201_mix', 'Var44,Var207_mix']
----------
Var44,Var208_mix diff_metric: -3.296852534195871e-05
----------
----------
good_cat_columns: ['Var35', 'Var44', 'Var132', 'Var195', 'Var205', 'Var207', 'Var210', 'Var227', 'Var192', 'Var193', 'Var197', 'Var199', 'Var202', 'Var216', 'Var217', 'Var222', 'Var228', 'Var7,Var35_mix', 'Var7,Var44_mix', 'Var7,Var132_mix', 'Var7,Var144_mix', 'Var7,Var181_mix', 'Var7,Var196_mix', 'Var7,Var201_mix', 'Var7,Var205_mix', 'Var7,Var206_mix', 'Var7,Var208_mix', 'Var7,Var218_mix', 'Var7,Var219_mix', 'Var7,Var221_mix', 'Var7,Var229_mix', 'Var7,Var193_mix', 'Var7,Var197_mix', 'Var7,Var204_mix', 'Var7,Var212_mix', 'Var7,Var216_mix', 'Var7,Var217_mix', 'Var7,Var228_mix', 'Var35,Var78_mix', 'Var35,Var132_mix', 'Var35,Var143_mix', 'Var35,Var144_mix', 'Var35,Var195_mix', 'Var35,Var196_mix', 'Var35,Var208_mix', 'Var35,Var211_mix', 'Var35,Var218_mix', 'Var35,Var219_mix', 'Var35,Var221_mix', 'Var35,Var192_mix', 'Var35,Var199_mix', 'Var35,Var202_mix', 'Var35,Var204_mix', 'Var35,Var212_mix', 'Var35,Var216_mix', 'Var35,Var217_mix', 'Var35,Var222_mix', 'Var35,Var228_mix', 'Var44,Var72_mix', 'Var44,Var132_mix', 'Var44,Var144_mix', 'Var44,Var173_mix', 'Var44,Var194_mix', 'Var44,Var201_mix', 'Var44,Var207_mix', 'Var44,Var208_mix']

In [29]:
data_plus_new_num = normed_fe_interaction(features[good_columns],level =2 ,max_feats = 100,num_columns=num_columns)
data_plus_new_num_test = normed_fe_interaction(test_data[good_columns],level =2, max_feats = 100,num_columns=num_columns)

data_plus_num_cat,cat_columns_new = categ_fe_interaction(data_plus_new_num, max_feats = 100,cat_columns=cat_columns)
data_plus_num_cat_test = categ_fe_interaction(data_plus_new_num_test, max_feats = 100,cat_columns=cat_columns)[0]

data = cat_prep(data_plus_num_cat,cat_columns_new)
data_test = cat_prep(data_plus_num_cat_test,cat_columns_new)
In [30]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=380,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.08,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )
 

estimator_cb.fit(data[good_columns2], labels, cat_features = cat_cols_selected,verbose=10, plot=True)
              
probs = estimator_cb.predict_proba(data_test[good_columns2])
write_to_submission_file(probs[:,1],out_file='submission_cb_new_fe_final3_selected_fe4.csv')
0:	learn: 0.5670785	total: 179ms	remaining: 1m 7s
10:	learn: 0.5868650	total: 1.95s	remaining: 1m 5s
20:	learn: 0.5870812	total: 3.72s	remaining: 1m 3s
30:	learn: 0.6350247	total: 5.43s	remaining: 1m 1s
40:	learn: 0.6934760	total: 7.45s	remaining: 1m 1s
50:	learn: 0.7106806	total: 9.41s	remaining: 1m
60:	learn: 0.7197737	total: 11.5s	remaining: 59.9s
70:	learn: 0.7272424	total: 13.4s	remaining: 58.3s
80:	learn: 0.7317083	total: 15.4s	remaining: 56.7s
90:	learn: 0.7358798	total: 17.2s	remaining: 54.8s
100:	learn: 0.7395372	total: 19.2s	remaining: 53s
110:	learn: 0.7414187	total: 21.2s	remaining: 51.4s
120:	learn: 0.7441493	total: 23.2s	remaining: 49.7s
130:	learn: 0.7467027	total: 25.2s	remaining: 47.9s
140:	learn: 0.7474188	total: 27.1s	remaining: 46s
150:	learn: 0.7504874	total: 29.1s	remaining: 44.1s
160:	learn: 0.7524772	total: 31s	remaining: 42.2s
170:	learn: 0.7537569	total: 33s	remaining: 40.3s
180:	learn: 0.7544946	total: 34.9s	remaining: 38.3s
190:	learn: 0.7556350	total: 36.7s	remaining: 36.3s
200:	learn: 0.7565301	total: 38.6s	remaining: 34.4s
210:	learn: 0.7586674	total: 40.6s	remaining: 32.5s
220:	learn: 0.7598111	total: 42.4s	remaining: 30.5s
230:	learn: 0.7608851	total: 44.3s	remaining: 28.6s
240:	learn: 0.7612926	total: 46.1s	remaining: 26.6s
250:	learn: 0.7623470	total: 48s	remaining: 24.7s
260:	learn: 0.7630500	total: 49.9s	remaining: 22.7s
270:	learn: 0.7640928	total: 51.7s	remaining: 20.8s
280:	learn: 0.7654472	total: 53.7s	remaining: 18.9s
290:	learn: 0.7657294	total: 55.6s	remaining: 17s
300:	learn: 0.7666416	total: 57.5s	remaining: 15.1s
310:	learn: 0.7676733	total: 59.4s	remaining: 13.2s
320:	learn: 0.7683516	total: 1m 1s	remaining: 11.2s
330:	learn: 0.7690049	total: 1m 3s	remaining: 9.34s
340:	learn: 0.7700515	total: 1m 5s	remaining: 7.44s
350:	learn: 0.7705938	total: 1m 6s	remaining: 5.53s
360:	learn: 0.7712952	total: 1m 8s	remaining: 3.63s
370:	learn: 0.7720325	total: 1m 10s	remaining: 1.72s
379:	learn: 0.7724579	total: 1m 12s	remaining: 0us

image.png

In [ ]:
 
In [31]:
#Индексы значимых признаков полсле предобработки
[num_inds, cat_indices1,cat_indices2]= data_preprocessor(feats_train,params=[0.99, 0.99, 30])# params = [prop_nan,max_prop_unique,N_lim]
cat_columns = list(np.array(features.columns)[cat_indices1]) + list(np.array(features.columns)[cat_indices2])
num_columns = list(np.array(features.columns)[num_inds])
good_columns = num_columns + cat_columns
In [37]:
data_plus_new_num_train = normed_fe_interaction(feats_train[good_columns],level =2 ,max_feats = 150,num_columns=num_columns)
data_plus_new_num_val = normed_fe_interaction(feats_val[good_columns],level =2, max_feats = 150,num_columns=num_columns)

data_train = cat_prep(data_plus_new_num_train,cat_columns)
data_val = cat_prep(data_plus_new_num_val,cat_columns)
In [38]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=380,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.08,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )



estimator_cb.fit(data_train, labels_train, cat_features = cat_columns,verbose=10, plot=True,
                 eval_set = (data_val, labels_val))

num_cols_selected, cat_cols_selected ,importances = feature_selection_loop_prediction(estimator_cb,data_val,labels_val,cat_columns,threshold=0)
good_columns2 = num_cols_selected + cat_cols_selected
0:	learn: 0.5605050	test: 0.5497987	best: 0.5497987 (0)	total: 159ms	remaining: 1m
10:	learn: 0.5569304	test: 0.5496511	best: 0.5504943 (2)	total: 1.53s	remaining: 51.5s
20:	learn: 0.6060851	test: 0.5984988	best: 0.5984988 (20)	total: 3.21s	remaining: 54.8s
30:	learn: 0.6806747	test: 0.6734573	best: 0.6734573 (30)	total: 5s	remaining: 56.3s
40:	learn: 0.7110625	test: 0.7010159	best: 0.7010159 (40)	total: 6.88s	remaining: 56.9s
50:	learn: 0.7267386	test: 0.7124145	best: 0.7124145 (50)	total: 8.79s	remaining: 56.7s
60:	learn: 0.7343113	test: 0.7185856	best: 0.7185856 (60)	total: 10.7s	remaining: 55.7s
70:	learn: 0.7391164	test: 0.7219484	best: 0.7219484 (70)	total: 12.6s	remaining: 54.7s
80:	learn: 0.7463315	test: 0.7278416	best: 0.7278416 (80)	total: 14.5s	remaining: 53.4s
90:	learn: 0.7493131	test: 0.7290943	best: 0.7290943 (90)	total: 16.3s	remaining: 51.8s
100:	learn: 0.7525668	test: 0.7311711	best: 0.7311711 (100)	total: 18.3s	remaining: 50.4s
110:	learn: 0.7544025	test: 0.7316055	best: 0.7318376 (109)	total: 20.1s	remaining: 48.7s
120:	learn: 0.7575296	test: 0.7333116	best: 0.7333116 (120)	total: 22.1s	remaining: 47.3s
130:	learn: 0.7598963	test: 0.7348425	best: 0.7348426 (129)	total: 23.9s	remaining: 45.5s
140:	learn: 0.7617585	test: 0.7363669	best: 0.7363669 (140)	total: 25.7s	remaining: 43.6s
150:	learn: 0.7627017	test: 0.7359426	best: 0.7363669 (140)	total: 27.5s	remaining: 41.7s
160:	learn: 0.7640391	test: 0.7363476	best: 0.7363669 (140)	total: 29.2s	remaining: 39.8s
170:	learn: 0.7651596	test: 0.7365328	best: 0.7365328 (170)	total: 31s	remaining: 37.9s
180:	learn: 0.7661651	test: 0.7361370	best: 0.7365328 (170)	total: 32.7s	remaining: 36s
190:	learn: 0.7669297	test: 0.7360513	best: 0.7365328 (170)	total: 34.4s	remaining: 34.1s
200:	learn: 0.7680340	test: 0.7361336	best: 0.7365328 (170)	total: 36.2s	remaining: 32.2s
210:	learn: 0.7693269	test: 0.7363924	best: 0.7365328 (170)	total: 37.9s	remaining: 30.4s
220:	learn: 0.7700009	test: 0.7358974	best: 0.7365328 (170)	total: 39.6s	remaining: 28.5s
230:	learn: 0.7707302	test: 0.7359266	best: 0.7365328 (170)	total: 41.4s	remaining: 26.7s
240:	learn: 0.7724907	test: 0.7363073	best: 0.7365328 (170)	total: 43.2s	remaining: 24.9s
250:	learn: 0.7732965	test: 0.7362061	best: 0.7365328 (170)	total: 45s	remaining: 23.1s
260:	learn: 0.7743137	test: 0.7361083	best: 0.7365328 (170)	total: 46.8s	remaining: 21.3s
270:	learn: 0.7754170	test: 0.7361125	best: 0.7365328 (170)	total: 48.5s	remaining: 19.5s
280:	learn: 0.7764743	test: 0.7366106	best: 0.7366106 (279)	total: 50.2s	remaining: 17.7s
290:	learn: 0.7773696	test: 0.7363237	best: 0.7367069 (281)	total: 51.9s	remaining: 15.9s
300:	learn: 0.7782720	test: 0.7359534	best: 0.7367069 (281)	total: 53.6s	remaining: 14.1s
310:	learn: 0.7793744	test: 0.7361395	best: 0.7367069 (281)	total: 55.4s	remaining: 12.3s
320:	learn: 0.7799949	test: 0.7359454	best: 0.7367069 (281)	total: 57.2s	remaining: 10.5s
330:	learn: 0.7806435	test: 0.7360084	best: 0.7367069 (281)	total: 58.9s	remaining: 8.72s
340:	learn: 0.7816439	test: 0.7358506	best: 0.7367069 (281)	total: 1m	remaining: 6.93s
350:	learn: 0.7826389	test: 0.7366799	best: 0.7367069 (281)	total: 1m 2s	remaining: 5.15s
360:	learn: 0.7838408	test: 0.7361139	best: 0.7367069 (281)	total: 1m 4s	remaining: 3.38s
370:	learn: 0.7850620	test: 0.7361234	best: 0.7367069 (281)	total: 1m 5s	remaining: 1.6s
379:	learn: 0.7859541	test: 0.7360192	best: 0.7367069 (281)	total: 1m 7s	remaining: 0us
bestTest = 0.7367068529
bestIteration = 281
Shrink model to first 282 iterations.
----------
Var3 diff_metric: 0.0
----------
----------
Var5 diff_metric: 0.0
----------
----------
Var6 diff_metric: 5.2124549241083784e-05
----------
----------
Var9 diff_metric: 0.0
----------
----------
Var10 diff_metric: 0.0
----------
----------
Var13 diff_metric: 6.059352822818109e-05
----------
----------
Var16 diff_metric: 0.0
----------
----------
Var17 diff_metric: 0.0
----------
----------
Var18 diff_metric: 0.0
----------
----------
Var21 diff_metric: 0.0
----------
----------
Var22 diff_metric: -0.0013586662003350902
----------
----------
good_num_columns: ['Var22']
----------
Var24 diff_metric: 0.0
----------
----------
Var25 diff_metric: 0.0002490283106877378
----------
----------
Var28 diff_metric: 0.00047779156451377425
----------
----------
Var33 diff_metric: 0.0
----------
----------
Var36 diff_metric: 1.0082117841703209e-06
----------
----------
Var37 diff_metric: 0.0
----------
----------
Var38 diff_metric: -0.00018883806717329144
----------
----------
good_num_columns: ['Var22', 'Var38']
----------
Var40 diff_metric: 0.0
----------
----------
Var41 diff_metric: 0.0
----------
----------
Var46 diff_metric: 0.0
----------
----------
Var50 diff_metric: 0.0
----------
----------
Var51 diff_metric: 0.0
----------
----------
Var53 diff_metric: 0.0
----------
----------
Var56 diff_metric: 0.0
----------
----------
Var57 diff_metric: -0.0006106738776661125
----------
----------
good_num_columns: ['Var22', 'Var38', 'Var57']
----------
Var58 diff_metric: 0.0
----------
----------
Var59 diff_metric: 0.0
----------
----------
Var60 diff_metric: 0.0
----------
----------
Var61 diff_metric: 0.0
----------
----------
Var63 diff_metric: 0.0
----------
----------
Var66 diff_metric: 0.0
----------
----------
Var68 diff_metric: 0.0
----------
----------
Var69 diff_metric: 0.0
----------
----------
Var70 diff_metric: 0.0
----------
----------
Var71 diff_metric: 0.0
----------
----------
Var73 diff_metric: -0.005921933556625292
----------
----------
good_num_columns: ['Var22', 'Var38', 'Var57', 'Var73']
----------
Var74 diff_metric: -0.005956716863178668
----------
----------
good_num_columns: ['Var22', 'Var38', 'Var57', 'Var73', 'Var74']
----------
Var76 diff_metric: 1.6131388547169223e-06
----------
----------
Var80 diff_metric: 0.0
----------
----------
Var81 diff_metric: 0.0005731683992955539
----------
----------
Var83 diff_metric: -1.764370622270306e-05
----------
----------
good_num_columns: ['Var22', 'Var38', 'Var57', 'Var73', 'Var74', 'Var83']
----------
Var84 diff_metric: 0.0
----------
----------
Var85 diff_metric: -0.0001192714540660944
----------
----------
good_num_columns: ['Var22', 'Var38', 'Var57', 'Var73', 'Var74', 'Var83', 'Var85']
----------
Var86 diff_metric: 0.0
----------
----------
Var88 diff_metric: 0.0
----------
----------
Var91 diff_metric: 0.0
----------
----------
Var94 diff_metric: 0.0
----------
----------
Var95 diff_metric: 0.0
----------
----------
Var96 diff_metric: 0.0
----------
----------
Var98 diff_metric: 0.0
----------
----------
Var99 diff_metric: 0.0
----------
----------
Var101 diff_metric: 0.0
----------
----------
Var103 diff_metric: 0.0
----------
----------
Var104 diff_metric: 0.0
----------
----------
Var105 diff_metric: 0.0
----------
----------
Var106 diff_metric: 0.0
----------
----------
Var108 diff_metric: 4.637774207338907e-06
----------
----------
Var109 diff_metric: 0.0002022472839027234
----------
----------
Var111 diff_metric: 0.0
----------
----------
Var112 diff_metric: 0.00020426370747095302
----------
----------
Var113 diff_metric: -0.003100049593937504
----------
----------
good_num_columns: ['Var22', 'Var38', 'Var57', 'Var73', 'Var74', 'Var83', 'Var85', 'Var113']
----------
Var114 diff_metric: -4.375639143261445e-05
----------
----------
good_num_columns: ['Var22', 'Var38', 'Var57', 'Var73', 'Var74', 'Var83', 'Var85', 'Var113', 'Var114']
----------
Var115 diff_metric: 0.0
----------
----------
Var117 diff_metric: 4.5772815001066114e-05
----------
----------
Var119 diff_metric: 0.0
----------
----------
Var120 diff_metric: 0.0
----------
----------
Var121 diff_metric: 0.0
----------
----------
Var123 diff_metric: 0.0
----------
----------
Var124 diff_metric: 4.0328471373474173e-07
----------
----------
Var125 diff_metric: 0.0
----------
----------
Var126 diff_metric: -0.11368454928547511
----------
----------
good_num_columns: ['Var22', 'Var38', 'Var57', 'Var73', 'Var74', 'Var83', 'Var85', 'Var113', 'Var114', 'Var126']
----------
Var127 diff_metric: 0.0
----------
----------
Var128 diff_metric: 0.0
----------
----------
Var129 diff_metric: 0.0
----------
----------
Var131 diff_metric: 0.0
----------
----------
Var133 diff_metric: 0.00011402875278865299
----------
----------
Var134 diff_metric: 8.842017347110431e-05
----------
----------
Var135 diff_metric: 0.0
----------
----------
Var136 diff_metric: 0.0
----------
----------
Var139 diff_metric: 0.0
----------
----------
Var140 diff_metric: -0.00021616060652407398
----------
----------
good_num_columns: ['Var22', 'Var38', 'Var57', 'Var73', 'Var74', 'Var83', 'Var85', 'Var113', 'Var114', 'Var126', 'Var140']
----------
Var145 diff_metric: 0.0
----------
----------
Var148 diff_metric: 0.0
----------
----------
Var149 diff_metric: -0.00016343113021233258
----------
----------
good_num_columns: ['Var22', 'Var38', 'Var57', 'Var73', 'Var74', 'Var83', 'Var85', 'Var113', 'Var114', 'Var126', 'Var140', 'Var149']
----------
Var150 diff_metric: 0.0
----------
----------
Var153 diff_metric: -0.0001991218273718065
----------
----------
good_num_columns: ['Var22', 'Var38', 'Var57', 'Var73', 'Var74', 'Var83', 'Var85', 'Var113', 'Var114', 'Var126', 'Var140', 'Var149', 'Var153']
----------
Var154 diff_metric: 0.0
----------
----------
Var156 diff_metric: 0.0
----------
----------
Var157 diff_metric: 0.0
----------
----------
Var160 diff_metric: 0.0
----------
----------
Var162 diff_metric: 0.0
----------
----------
Var163 diff_metric: 2.812910877814101e-05
----------
----------
Var165 diff_metric: 0.0
----------
----------
Var166 diff_metric: 0.00032403926742941014
----------
----------
Var168 diff_metric: 0.0
----------
----------
Var171 diff_metric: 4.637774207150169e-05
----------
----------
Var174 diff_metric: 0.0
----------
----------
Var176 diff_metric: 0.0
----------
----------
Var177 diff_metric: 0.0
----------
----------
Var178 diff_metric: 0.0
----------
----------
Var180 diff_metric: 0.0
----------
----------
Var182 diff_metric: 0.0
----------
----------
Var183 diff_metric: 0.0
----------
----------
Var184 diff_metric: 0.0
----------
----------
Var187 diff_metric: 0.0
----------
----------
Var188 diff_metric: 0.0
----------
----------
Var189 diff_metric: -0.005839764296216132
----------
----------
good_num_columns: ['Var22', 'Var38', 'Var57', 'Var73', 'Var74', 'Var83', 'Var85', 'Var113', 'Var114', 'Var126', 'Var140', 'Var149', 'Var153', 'Var189']
----------
Var1 diff_metric: -5.141880099146512e-06
----------
----------
good_cat_columns: ['Var1']
----------
Var2 diff_metric: 0.0
----------
----------
Var4 diff_metric: 0.0
----------
----------
Var7 diff_metric: -0.0011560156317187431
----------
----------
good_cat_columns: ['Var1', 'Var7']
----------
Var11 diff_metric: 0.0
----------
----------
Var12 diff_metric: -3.095210177361807e-05
----------
----------
good_cat_columns: ['Var1', 'Var7', 'Var12']
----------
Var14 diff_metric: -9.073906057421866e-06
----------
----------
good_cat_columns: ['Var1', 'Var7', 'Var12', 'Var14']
----------
Var19 diff_metric: 0.0
----------
----------
Var23 diff_metric: -1.683713679545562e-05
----------
----------
good_cat_columns: ['Var1', 'Var7', 'Var12', 'Var14', 'Var23']
----------
Var26 diff_metric: 0.0
----------
----------
Var27 diff_metric: 0.0
----------
----------
Var29 diff_metric: 0.0
----------
----------
Var30 diff_metric: 5.14188009923533e-05
----------
----------
Var34 diff_metric: 0.0
----------
----------
Var35 diff_metric: -0.0010593281216176864
----------
----------
good_cat_columns: ['Var1', 'Var7', 'Var12', 'Var14', 'Var23', 'Var35']
----------
Var43 diff_metric: -0.0004987623696242061
----------
----------
good_cat_columns: ['Var1', 'Var7', 'Var12', 'Var14', 'Var23', 'Var35', 'Var43']
----------
Var44 diff_metric: -0.00019670211908973112
----------
----------
good_cat_columns: ['Var1', 'Var7', 'Var12', 'Var14', 'Var23', 'Var35', 'Var43', 'Var44']
----------
Var47 diff_metric: -0.00016232209724986735
----------
----------
good_cat_columns: ['Var1', 'Var7', 'Var12', 'Var14', 'Var23', 'Var35', 'Var43', 'Var44', 'Var47']
----------
Var49 diff_metric: 0.0
----------
----------
Var54 diff_metric: 0.0
----------
----------
Var62 diff_metric: 0.0
----------
----------
Var65 diff_metric: -0.0006368873840543188
----------
----------
good_cat_columns: ['Var1', 'Var7', 'Var12', 'Var14', 'Var23', 'Var35', 'Var43', 'Var44', 'Var47', 'Var65']
----------
Var67 diff_metric: 0.0
----------
----------
Var72 diff_metric: 0.0
----------
----------
Var75 diff_metric: -4.486542439507968e-05
----------
----------
good_cat_columns: ['Var1', 'Var7', 'Var12', 'Var14', 'Var23', 'Var35', 'Var43', 'Var44', 'Var47', 'Var65', 'Var75']
----------
Var77 diff_metric: -6.341652122365815e-05
----------
----------
good_cat_columns: ['Var1', 'Var7', 'Var12', 'Var14', 'Var23', 'Var35', 'Var43', 'Var44', 'Var47', 'Var65', 'Var75', 'Var77']
----------
Var78 diff_metric: 7.722902266682485e-05
----------
----------
Var82 diff_metric: 0.0
----------
----------
Var87 diff_metric: 0.0
----------
----------
Var89 diff_metric: -9.134398764487628e-05
----------
----------
good_cat_columns: ['Var1', 'Var7', 'Var12', 'Var14', 'Var23', 'Var35', 'Var43', 'Var44', 'Var47', 'Var65', 'Var75', 'Var77', 'Var89']
----------
Var90 diff_metric: 0.0
----------
----------
Var93 diff_metric: 0.0
----------
----------
Var97 diff_metric: 0.0
----------
----------
Var100 diff_metric: 0.0
----------
----------
Var107 diff_metric: -0.0003070004882768096
----------
----------
good_cat_columns: ['Var1', 'Var7', 'Var12', 'Var14', 'Var23', 'Var35', 'Var43', 'Var44', 'Var47', 'Var65', 'Var75', 'Var77', 'Var89', 'Var107']
----------
Var110 diff_metric: 0.0
----------
----------
Var116 diff_metric: 0.0
----------
----------
Var122 diff_metric: 0.0
----------
----------
Var130 diff_metric: 0.0
----------
----------
Var132 diff_metric: -0.0002124302229227215
----------
----------
good_cat_columns: ['Var1', 'Var7', 'Var12', 'Var14', 'Var23', 'Var35', 'Var43', 'Var44', 'Var47', 'Var65', 'Var75', 'Var77', 'Var89', 'Var107', 'Var132']
----------
Var137 diff_metric: -5.847628347988021e-06
----------
----------
good_cat_columns: ['Var1', 'Var7', 'Var12', 'Var14', 'Var23', 'Var35', 'Var43', 'Var44', 'Var47', 'Var65', 'Var75', 'Var77', 'Var89', 'Var107', 'Var132', 'Var137']
----------
Var138 diff_metric: 0.0
----------
----------
Var142 diff_metric: 0.0
----------
----------
Var143 diff_metric: 0.0
----------
----------
Var144 diff_metric: 0.0
----------
----------
Var146 diff_metric: -0.00012703468480412816
----------
----------
good_cat_columns: ['Var1', 'Var7', 'Var12', 'Var14', 'Var23', 'Var35', 'Var43', 'Var44', 'Var47', 'Var65', 'Var75', 'Var77', 'Var89', 'Var107', 'Var132', 'Var137', 'Var146']
----------
Var147 diff_metric: 0.0
----------
----------
Var151 diff_metric: 8.721031933001111e-05
----------
----------
Var152 diff_metric: 4.9200735067200796e-05
----------
----------
Var155 diff_metric: 0.0
----------
----------
Var158 diff_metric: -1.1997720231526898e-05
----------
----------
good_cat_columns: ['Var1', 'Var7', 'Var12', 'Var14', 'Var23', 'Var35', 'Var43', 'Var44', 'Var47', 'Var65', 'Var75', 'Var77', 'Var89', 'Var107', 'Var132', 'Var137', 'Var146', 'Var158']
----------
Var159 diff_metric: 0.0
----------
----------
Var161 diff_metric: 0.0
----------
----------
Var164 diff_metric: -0.00010918933622461324
----------
----------
good_cat_columns: ['Var1', 'Var7', 'Var12', 'Var14', 'Var23', 'Var35', 'Var43', 'Var44', 'Var47', 'Var65', 'Var75', 'Var77', 'Var89', 'Var107', 'Var132', 'Var137', 'Var146', 'Var158', 'Var164']
----------
Var170 diff_metric: -8.831935229236532e-05
----------
----------
good_cat_columns: ['Var1', 'Var7', 'Var12', 'Var14', 'Var23', 'Var35', 'Var43', 'Var44', 'Var47', 'Var65', 'Var75', 'Var77', 'Var89', 'Var107', 'Var132', 'Var137', 'Var146', 'Var158', 'Var164', 'Var170']
----------
Var172 diff_metric: 0.0002952044104023166
----------
----------
Var173 diff_metric: 0.0
----------
----------
Var179 diff_metric: -9.073906057310843e-06
----------
----------
good_cat_columns: ['Var1', 'Var7', 'Var12', 'Var14', 'Var23', 'Var35', 'Var43', 'Var44', 'Var47', 'Var65', 'Var75', 'Var77', 'Var89', 'Var107', 'Var132', 'Var137', 'Var146', 'Var158', 'Var164', 'Var170', 'Var179']
----------
Var181 diff_metric: 0.0
----------
----------
Var186 diff_metric: 0.0
----------
----------
Var194 diff_metric: 0.0
----------
----------
Var195 diff_metric: 5.958531644401077e-05
----------
----------
Var196 diff_metric: 0.0
----------
----------
Var201 diff_metric: 3.4279200662457043e-06
----------
----------
Var203 diff_metric: 0.0
----------
----------
Var205 diff_metric: -0.0006496916737132041
----------
----------
good_cat_columns: ['Var1', 'Var7', 'Var12', 'Var14', 'Var23', 'Var35', 'Var43', 'Var44', 'Var47', 'Var65', 'Var75', 'Var77', 'Var89', 'Var107', 'Var132', 'Var137', 'Var146', 'Var158', 'Var164', 'Var170', 'Var179', 'Var205']
----------
Var206 diff_metric: -0.0021549518674652823
----------
----------
good_cat_columns: ['Var1', 'Var7', 'Var12', 'Var14', 'Var23', 'Var35', 'Var43', 'Var44', 'Var47', 'Var65', 'Var75', 'Var77', 'Var89', 'Var107', 'Var132', 'Var137', 'Var146', 'Var158', 'Var164', 'Var170', 'Var179', 'Var205', 'Var206']
----------
Var207 diff_metric: -0.0006022048986792372
----------
----------
good_cat_columns: ['Var1', 'Var7', 'Var12', 'Var14', 'Var23', 'Var35', 'Var43', 'Var44', 'Var47', 'Var65', 'Var75', 'Var77', 'Var89', 'Var107', 'Var132', 'Var137', 'Var146', 'Var158', 'Var164', 'Var170', 'Var179', 'Var205', 'Var206', 'Var207']
----------
Var208 diff_metric: 0.0
----------
----------
Var210 diff_metric: -0.0019773049510960483
----------
----------
good_cat_columns: ['Var1', 'Var7', 'Var12', 'Var14', 'Var23', 'Var35', 'Var43', 'Var44', 'Var47', 'Var65', 'Var75', 'Var77', 'Var89', 'Var107', 'Var132', 'Var137', 'Var146', 'Var158', 'Var164', 'Var170', 'Var179', 'Var205', 'Var206', 'Var207', 'Var210']
----------
Var211 diff_metric: -0.0002551784025710546
----------
----------
good_cat_columns: ['Var1', 'Var7', 'Var12', 'Var14', 'Var23', 'Var35', 'Var43', 'Var44', 'Var47', 'Var65', 'Var75', 'Var77', 'Var89', 'Var107', 'Var132', 'Var137', 'Var146', 'Var158', 'Var164', 'Var170', 'Var179', 'Var205', 'Var206', 'Var207', 'Var210', 'Var211']
----------
Var218 diff_metric: -0.0035760263772398604
----------
----------
good_cat_columns: ['Var1', 'Var7', 'Var12', 'Var14', 'Var23', 'Var35', 'Var43', 'Var44', 'Var47', 'Var65', 'Var75', 'Var77', 'Var89', 'Var107', 'Var132', 'Var137', 'Var146', 'Var158', 'Var164', 'Var170', 'Var179', 'Var205', 'Var206', 'Var207', 'Var210', 'Var211', 'Var218']
----------
Var219 diff_metric: -0.000447041105096746
----------
----------
good_cat_columns: ['Var1', 'Var7', 'Var12', 'Var14', 'Var23', 'Var35', 'Var43', 'Var44', 'Var47', 'Var65', 'Var75', 'Var77', 'Var89', 'Var107', 'Var132', 'Var137', 'Var146', 'Var158', 'Var164', 'Var170', 'Var179', 'Var205', 'Var206', 'Var207', 'Var210', 'Var211', 'Var218', 'Var219']
----------
Var221 diff_metric: 0.0
----------
----------
Var223 diff_metric: 0.0
----------
----------
Var225 diff_metric: -8.97308487903814e-05
----------
----------
good_cat_columns: ['Var1', 'Var7', 'Var12', 'Var14', 'Var23', 'Var35', 'Var43', 'Var44', 'Var47', 'Var65', 'Var75', 'Var77', 'Var89', 'Var107', 'Var132', 'Var137', 'Var146', 'Var158', 'Var164', 'Var170', 'Var179', 'Var205', 'Var206', 'Var207', 'Var210', 'Var211', 'Var218', 'Var219', 'Var225']
----------
Var226 diff_metric: -0.0019445380681109015
----------
----------
good_cat_columns: ['Var1', 'Var7', 'Var12', 'Var14', 'Var23', 'Var35', 'Var43', 'Var44', 'Var47', 'Var65', 'Var75', 'Var77', 'Var89', 'Var107', 'Var132', 'Var137', 'Var146', 'Var158', 'Var164', 'Var170', 'Var179', 'Var205', 'Var206', 'Var207', 'Var210', 'Var211', 'Var218', 'Var219', 'Var225', 'Var226']
----------
Var227 diff_metric: 0.0
----------
----------
Var229 diff_metric: 0.0
----------
----------
Var192 diff_metric: -0.004076401885718894
----------
----------
good_cat_columns: ['Var1', 'Var7', 'Var12', 'Var14', 'Var23', 'Var35', 'Var43', 'Var44', 'Var47', 'Var65', 'Var75', 'Var77', 'Var89', 'Var107', 'Var132', 'Var137', 'Var146', 'Var158', 'Var164', 'Var170', 'Var179', 'Var205', 'Var206', 'Var207', 'Var210', 'Var211', 'Var218', 'Var219', 'Var225', 'Var226', 'Var192']
----------
Var193 diff_metric: -0.000162826203141897
----------
----------
good_cat_columns: ['Var1', 'Var7', 'Var12', 'Var14', 'Var23', 'Var35', 'Var43', 'Var44', 'Var47', 'Var65', 'Var75', 'Var77', 'Var89', 'Var107', 'Var132', 'Var137', 'Var146', 'Var158', 'Var164', 'Var170', 'Var179', 'Var205', 'Var206', 'Var207', 'Var210', 'Var211', 'Var218', 'Var219', 'Var225', 'Var226', 'Var192', 'Var193']
----------
Var197 diff_metric: 0.000618840393117992
----------
----------
Var198 diff_metric: 0.0002256377972952306
----------
----------
Var199 diff_metric: -0.011189739307687185
----------
----------
good_cat_columns: ['Var1', 'Var7', 'Var12', 'Var14', 'Var23', 'Var35', 'Var43', 'Var44', 'Var47', 'Var65', 'Var75', 'Var77', 'Var89', 'Var107', 'Var132', 'Var137', 'Var146', 'Var158', 'Var164', 'Var170', 'Var179', 'Var205', 'Var206', 'Var207', 'Var210', 'Var211', 'Var218', 'Var219', 'Var225', 'Var226', 'Var192', 'Var193', 'Var199']
----------
Var200 diff_metric: -0.012070412301151578
----------
----------
good_cat_columns: ['Var1', 'Var7', 'Var12', 'Var14', 'Var23', 'Var35', 'Var43', 'Var44', 'Var47', 'Var65', 'Var75', 'Var77', 'Var89', 'Var107', 'Var132', 'Var137', 'Var146', 'Var158', 'Var164', 'Var170', 'Var179', 'Var205', 'Var206', 'Var207', 'Var210', 'Var211', 'Var218', 'Var219', 'Var225', 'Var226', 'Var192', 'Var193', 'Var199', 'Var200']
----------
Var202 diff_metric: -0.00019720622498176077
----------
----------
good_cat_columns: ['Var1', 'Var7', 'Var12', 'Var14', 'Var23', 'Var35', 'Var43', 'Var44', 'Var47', 'Var65', 'Var75', 'Var77', 'Var89', 'Var107', 'Var132', 'Var137', 'Var146', 'Var158', 'Var164', 'Var170', 'Var179', 'Var205', 'Var206', 'Var207', 'Var210', 'Var211', 'Var218', 'Var219', 'Var225', 'Var226', 'Var192', 'Var193', 'Var199', 'Var200', 'Var202']
----------
Var204 diff_metric: 0.00019186270252591342
----------
----------
Var212 diff_metric: -0.0015179636622324466
----------
----------
good_cat_columns: ['Var1', 'Var7', 'Var12', 'Var14', 'Var23', 'Var35', 'Var43', 'Var44', 'Var47', 'Var65', 'Var75', 'Var77', 'Var89', 'Var107', 'Var132', 'Var137', 'Var146', 'Var158', 'Var164', 'Var170', 'Var179', 'Var205', 'Var206', 'Var207', 'Var210', 'Var211', 'Var218', 'Var219', 'Var225', 'Var226', 'Var192', 'Var193', 'Var199', 'Var200', 'Var202', 'Var212']
----------
Var214 diff_metric: -0.007788536853820638
----------
----------
good_cat_columns: ['Var1', 'Var7', 'Var12', 'Var14', 'Var23', 'Var35', 'Var43', 'Var44', 'Var47', 'Var65', 'Var75', 'Var77', 'Var89', 'Var107', 'Var132', 'Var137', 'Var146', 'Var158', 'Var164', 'Var170', 'Var179', 'Var205', 'Var206', 'Var207', 'Var210', 'Var211', 'Var218', 'Var219', 'Var225', 'Var226', 'Var192', 'Var193', 'Var199', 'Var200', 'Var202', 'Var212', 'Var214']
----------
Var216 diff_metric: -0.002144869749623579
----------
----------
good_cat_columns: ['Var1', 'Var7', 'Var12', 'Var14', 'Var23', 'Var35', 'Var43', 'Var44', 'Var47', 'Var65', 'Var75', 'Var77', 'Var89', 'Var107', 'Var132', 'Var137', 'Var146', 'Var158', 'Var164', 'Var170', 'Var179', 'Var205', 'Var206', 'Var207', 'Var210', 'Var211', 'Var218', 'Var219', 'Var225', 'Var226', 'Var192', 'Var193', 'Var199', 'Var200', 'Var202', 'Var212', 'Var214', 'Var216']
----------
Var217 diff_metric: -0.009745576748055096
----------
----------
good_cat_columns: ['Var1', 'Var7', 'Var12', 'Var14', 'Var23', 'Var35', 'Var43', 'Var44', 'Var47', 'Var65', 'Var75', 'Var77', 'Var89', 'Var107', 'Var132', 'Var137', 'Var146', 'Var158', 'Var164', 'Var170', 'Var179', 'Var205', 'Var206', 'Var207', 'Var210', 'Var211', 'Var218', 'Var219', 'Var225', 'Var226', 'Var192', 'Var193', 'Var199', 'Var200', 'Var202', 'Var212', 'Var214', 'Var216', 'Var217']
----------
Var220 diff_metric: 3.730383601407983e-05
----------
----------
Var222 diff_metric: -0.00012340512238118162
----------
----------
good_cat_columns: ['Var1', 'Var7', 'Var12', 'Var14', 'Var23', 'Var35', 'Var43', 'Var44', 'Var47', 'Var65', 'Var75', 'Var77', 'Var89', 'Var107', 'Var132', 'Var137', 'Var146', 'Var158', 'Var164', 'Var170', 'Var179', 'Var205', 'Var206', 'Var207', 'Var210', 'Var211', 'Var218', 'Var219', 'Var225', 'Var226', 'Var192', 'Var193', 'Var199', 'Var200', 'Var202', 'Var212', 'Var214', 'Var216', 'Var217', 'Var222']
----------
Var228 diff_metric: -0.0007713828360614849
----------
----------
good_cat_columns: ['Var1', 'Var7', 'Var12', 'Var14', 'Var23', 'Var35', 'Var43', 'Var44', 'Var47', 'Var65', 'Var75', 'Var77', 'Var89', 'Var107', 'Var132', 'Var137', 'Var146', 'Var158', 'Var164', 'Var170', 'Var179', 'Var205', 'Var206', 'Var207', 'Var210', 'Var211', 'Var218', 'Var219', 'Var225', 'Var226', 'Var192', 'Var193', 'Var199', 'Var200', 'Var202', 'Var212', 'Var214', 'Var216', 'Var217', 'Var222', 'Var228']
----------
Var3,Var5_mix diff_metric: 0.0
----------
----------
Var3,Var6_mix diff_metric: 0.0
----------
----------
Var3,Var9_mix diff_metric: 0.0
----------
----------
Var3,Var10_mix diff_metric: 0.0
----------
----------
Var3,Var13_mix diff_metric: 0.0
----------
----------
Var3,Var16_mix diff_metric: 0.0
----------
----------
Var3,Var17_mix diff_metric: 0.0
----------
----------
Var3,Var18_mix diff_metric: 0.0
----------
----------
Var3,Var21_mix diff_metric: 0.0
----------
----------
Var3,Var22_mix diff_metric: 0.0
----------
----------
Var3,Var24_mix diff_metric: 0.0
----------
----------
Var3,Var25_mix diff_metric: 0.0
----------
----------
Var3,Var28_mix diff_metric: 0.0
----------
----------
Var3,Var33_mix diff_metric: 0.0
----------
----------
Var3,Var36_mix diff_metric: 0.0
----------
----------
Var3,Var37_mix diff_metric: 0.0
----------
----------
Var3,Var38_mix diff_metric: 0.0
----------
----------
Var3,Var40_mix diff_metric: 0.0
----------
----------
Var3,Var41_mix diff_metric: 0.0
----------
----------
Var3,Var46_mix diff_metric: 0.0
----------
----------
Var3,Var50_mix diff_metric: 0.0
----------
----------
Var3,Var51_mix diff_metric: 0.0
----------
----------
Var3,Var53_mix diff_metric: 0.0
----------
----------
Var3,Var56_mix diff_metric: 0.0
----------
----------
Var3,Var57_mix diff_metric: 0.0
----------
----------
Var3,Var58_mix diff_metric: 0.0
----------
----------
Var3,Var59_mix diff_metric: 0.0
----------
----------
Var3,Var60_mix diff_metric: 0.0
----------
----------
Var3,Var61_mix diff_metric: 0.0
----------
----------
Var3,Var63_mix diff_metric: 0.0
----------
----------
Var3,Var66_mix diff_metric: 0.0
----------
----------
Var3,Var68_mix diff_metric: -9.84014701339575e-05
----------
----------
good_num_columns: ['Var22', 'Var38', 'Var57', 'Var73', 'Var74', 'Var83', 'Var85', 'Var113', 'Var114', 'Var126', 'Var140', 'Var149', 'Var153', 'Var189', 'Var3,Var68_mix']
----------
Var3,Var69_mix diff_metric: 0.0
----------
----------
Var3,Var70_mix diff_metric: 0.0
----------
----------
Var3,Var71_mix diff_metric: 0.0
----------
----------
Var3,Var73_mix diff_metric: 0.0
----------
----------
Var3,Var74_mix diff_metric: 0.0
----------
----------
Var3,Var76_mix diff_metric: 0.0
----------
----------
Var3,Var80_mix diff_metric: 0.0
----------
----------
Var3,Var81_mix diff_metric: 0.0
----------
----------
Var3,Var83_mix diff_metric: 0.0
----------
----------
Var3,Var84_mix diff_metric: 0.0
----------
----------
Var3,Var85_mix diff_metric: 0.0
----------
----------
Var3,Var86_mix diff_metric: 0.0
----------
----------
Var3,Var88_mix diff_metric: 0.0
----------
----------
Var3,Var91_mix diff_metric: 0.0
----------
----------
Var3,Var94_mix diff_metric: 0.0
----------
----------
Var3,Var95_mix diff_metric: 0.0
----------
----------
Var3,Var96_mix diff_metric: 0.0
----------
----------
Var3,Var98_mix diff_metric: 0.0
----------
----------
Var3,Var99_mix diff_metric: 0.0
----------
----------
Var3,Var101_mix diff_metric: 0.0
----------
----------
Var3,Var103_mix diff_metric: 0.0
----------
----------
Var3,Var104_mix diff_metric: 0.0
----------
----------
Var3,Var105_mix diff_metric: 0.0
----------
----------
Var3,Var106_mix diff_metric: 0.0
----------
----------
Var3,Var108_mix diff_metric: 0.0
----------
----------
Var3,Var109_mix diff_metric: 0.0
----------
----------
Var3,Var111_mix diff_metric: 0.0
----------
----------
Var3,Var112_mix diff_metric: 0.0
----------
----------
Var3,Var113_mix diff_metric: 0.0
----------
----------
Var3,Var114_mix diff_metric: -4.436131850305003e-06
----------
----------
good_num_columns: ['Var22', 'Var38', 'Var57', 'Var73', 'Var74', 'Var83', 'Var85', 'Var113', 'Var114', 'Var126', 'Var140', 'Var149', 'Var153', 'Var189', 'Var3,Var68_mix', 'Var3,Var114_mix']
----------
Var3,Var115_mix diff_metric: 0.0
----------
----------
Var3,Var117_mix diff_metric: 0.0
----------
----------
Var3,Var119_mix diff_metric: 0.0
----------
----------
Var3,Var120_mix diff_metric: 0.0
----------
----------
Var3,Var121_mix diff_metric: 0.0
----------
----------
Var3,Var123_mix diff_metric: 0.0
----------
----------
Var3,Var124_mix diff_metric: 0.0
----------
----------
Var3,Var125_mix diff_metric: 0.0
----------
----------
Var3,Var126_mix diff_metric: 0.0004877728611770715
----------
----------
Var3,Var127_mix diff_metric: 0.0
----------
----------
Var3,Var128_mix diff_metric: 0.0
----------
----------
Var3,Var129_mix diff_metric: 0.0
----------
----------
Var3,Var131_mix diff_metric: 0.0
----------
----------
Var3,Var133_mix diff_metric: 0.0
----------
----------
Var3,Var134_mix diff_metric: 0.0
----------
----------
Var3,Var135_mix diff_metric: 0.0
----------
----------
Var3,Var136_mix diff_metric: 0.0
----------
----------
Var3,Var139_mix diff_metric: 0.0
----------
----------
Var3,Var140_mix diff_metric: 0.0
----------
----------
Var3,Var145_mix diff_metric: 0.0
----------
----------
Var3,Var148_mix diff_metric: 0.0
----------
----------
Var3,Var149_mix diff_metric: 0.0
----------
----------
Var3,Var150_mix diff_metric: 0.0
----------
----------
Var3,Var153_mix diff_metric: 0.0
----------
----------
Var3,Var154_mix diff_metric: 0.0
----------
----------
Var3,Var156_mix diff_metric: 0.0
----------
----------
Var3,Var157_mix diff_metric: 0.0
----------
----------
Var3,Var160_mix diff_metric: 0.0
----------
----------
Var3,Var162_mix diff_metric: 0.0
----------
----------
Var3,Var163_mix diff_metric: 0.0
----------
----------
Var3,Var165_mix diff_metric: 0.0
----------
----------
Var3,Var166_mix diff_metric: 0.0
----------
----------
Var3,Var168_mix diff_metric: 0.0
----------
----------
Var3,Var171_mix diff_metric: 0.0
----------
----------
Var3,Var174_mix diff_metric: 0.0
----------
----------
Var3,Var176_mix diff_metric: 0.0
----------
----------
Var3,Var177_mix diff_metric: 0.0
----------
----------
Var3,Var178_mix diff_metric: 0.0
----------
----------
Var3,Var180_mix diff_metric: 0.0
----------
----------
Var3,Var182_mix diff_metric: 0.0
----------
----------
Var3,Var183_mix diff_metric: 0.0
----------
----------
Var3,Var184_mix diff_metric: 0.0
----------
----------
Var3,Var187_mix diff_metric: 0.0
----------
----------
Var3,Var188_mix diff_metric: 0.0
----------
----------
Var3,Var189_mix diff_metric: 0.0
----------
----------
Var5,Var6_mix diff_metric: 0.0
----------
----------
Var5,Var9_mix diff_metric: 0.0
----------
----------
Var5,Var10_mix diff_metric: 0.0
----------
----------
Var5,Var13_mix diff_metric: 0.0
----------
----------
Var5,Var16_mix diff_metric: 0.0
----------
----------
Var5,Var17_mix diff_metric: 0.0
----------
----------
Var5,Var18_mix diff_metric: 0.0
----------
----------
Var5,Var21_mix diff_metric: 0.0
----------
----------
Var5,Var22_mix diff_metric: 0.0
----------
----------
Var5,Var24_mix diff_metric: 0.0
----------
----------
Var5,Var25_mix diff_metric: 0.0
----------
----------
Var5,Var28_mix diff_metric: 0.0
----------
----------
Var5,Var33_mix diff_metric: 0.0
----------
----------
Var5,Var36_mix diff_metric: 0.0
----------
----------
Var5,Var37_mix diff_metric: 0.0
----------
----------
Var5,Var38_mix diff_metric: 0.0
----------
----------
Var5,Var40_mix diff_metric: 0.0
----------
----------
Var5,Var41_mix diff_metric: 0.0
----------
----------
Var5,Var46_mix diff_metric: 0.0
----------
----------
Var5,Var50_mix diff_metric: 0.0
----------
----------
Var5,Var51_mix diff_metric: 0.0
----------
----------
Var5,Var53_mix diff_metric: 0.0
----------
----------
Var5,Var56_mix diff_metric: 0.0
----------
----------
Var5,Var57_mix diff_metric: 0.0
----------
----------
Var5,Var58_mix diff_metric: 0.0
----------
----------
Var5,Var59_mix diff_metric: 0.0
----------
----------
Var5,Var60_mix diff_metric: 0.0
----------
----------
Var5,Var61_mix diff_metric: 0.0
----------
----------
Var5,Var63_mix diff_metric: 0.0
----------
----------
Var5,Var66_mix diff_metric: 0.0
----------
----------
Var5,Var68_mix diff_metric: 0.0
----------
----------
Var5,Var69_mix diff_metric: 0.0
----------
----------
Var5,Var70_mix diff_metric: 0.0
----------
----------
Var5,Var71_mix diff_metric: 0.0
----------
----------
Var5,Var73_mix diff_metric: 0.0
----------
----------
Var5,Var74_mix diff_metric: 0.0
----------
----------
Var5,Var76_mix diff_metric: 0.0
----------
----------
Var5,Var80_mix diff_metric: -7.561588381221895e-06
----------
----------
good_num_columns: ['Var22', 'Var38', 'Var57', 'Var73', 'Var74', 'Var83', 'Var85', 'Var113', 'Var114', 'Var126', 'Var140', 'Var149', 'Var153', 'Var189', 'Var3,Var68_mix', 'Var3,Var114_mix', 'Var5,Var80_mix']
----------
Var5,Var81_mix diff_metric: 0.0
----------
----------
Var5,Var83_mix diff_metric: 0.0
----------
----------
Var5,Var84_mix diff_metric: 0.0
----------
----------
Var5,Var85_mix diff_metric: 0.0
----------
----------
Var5,Var86_mix diff_metric: 0.0
----------

In [39]:
data_plus_new_num = normed_fe_interaction(features[good_columns],level =2 ,max_feats = 150,num_columns=num_columns)
data_plus_new_num_test = normed_fe_interaction(test_data[good_columns],level =2, max_feats = 150,num_columns=num_columns)

data = cat_prep(data_plus_new_num ,cat_columns)
data_test = cat_prep(data_plus_new_num_test,cat_columns)
In [41]:
estimator_cb = CatBoostClassifier(task_type='GPU',iterations=380,random_state=0,
                                  eval_metric = 'AUC',
                                  learning_rate=0.08,
                                  boosting_type = 'Ordered',
                                  bootstrap_type='Bernoulli',
                                  subsample=0.8,
                                  one_hot_max_size=10,                                  
                                  leaf_estimation_iterations=10,
                                  max_ctr_complexity=4
                                  )
 

estimator_cb.fit(data[good_columns2], labels, cat_features = cat_cols_selected,verbose=10, plot=True)
              
probs = estimator_cb.predict_proba(data_test[good_columns2])
write_to_submission_file(probs[:,1],out_file='submission_cb_new_fe_final3_selected_fe5.csv')
0:	learn: 0.5519128	total: 157ms	remaining: 59.6s
10:	learn: 0.5556480	total: 1.34s	remaining: 44.9s
20:	learn: 0.5988294	total: 2.67s	remaining: 45.6s
30:	learn: 0.6510229	total: 3.96s	remaining: 44.6s
40:	learn: 0.6980442	total: 5.36s	remaining: 44.3s
50:	learn: 0.7176897	total: 6.83s	remaining: 44s
60:	learn: 0.7245119	total: 8.2s	remaining: 42.9s
70:	learn: 0.7307275	total: 9.62s	remaining: 41.9s
80:	learn: 0.7362364	total: 11s	remaining: 40.6s
90:	learn: 0.7407193	total: 12.5s	remaining: 39.6s
100:	learn: 0.7438927	total: 13.8s	remaining: 38.2s
110:	learn: 0.7466337	total: 15.3s	remaining: 37.1s
120:	learn: 0.7490082	total: 16.7s	remaining: 35.8s
130:	learn: 0.7514439	total: 18.1s	remaining: 34.4s
140:	learn: 0.7534985	total: 19.5s	remaining: 33.1s
150:	learn: 0.7542914	total: 20.9s	remaining: 31.7s
160:	learn: 0.7556021	total: 22.3s	remaining: 30.3s
170:	learn: 0.7567201	total: 23.7s	remaining: 28.9s
180:	learn: 0.7580822	total: 25s	remaining: 27.5s
190:	learn: 0.7593804	total: 26.4s	remaining: 26.1s
200:	learn: 0.7597601	total: 27.7s	remaining: 24.7s
210:	learn: 0.7610842	total: 29.1s	remaining: 23.3s
220:	learn: 0.7620250	total: 30.5s	remaining: 22s
230:	learn: 0.7630050	total: 31.9s	remaining: 20.6s
240:	learn: 0.7631229	total: 33.3s	remaining: 19.2s
250:	learn: 0.7633936	total: 34.6s	remaining: 17.8s
260:	learn: 0.7641762	total: 35.9s	remaining: 16.4s
270:	learn: 0.7647633	total: 37.2s	remaining: 15s
280:	learn: 0.7652184	total: 38.6s	remaining: 13.6s
290:	learn: 0.7660123	total: 40s	remaining: 12.2s
300:	learn: 0.7664313	total: 41.3s	remaining: 10.8s
310:	learn: 0.7670855	total: 42.6s	remaining: 9.45s
320:	learn: 0.7677852	total: 43.9s	remaining: 8.07s
330:	learn: 0.7687119	total: 45.2s	remaining: 6.69s
340:	learn: 0.7694173	total: 46.5s	remaining: 5.32s
350:	learn: 0.7697378	total: 47.8s	remaining: 3.94s
360:	learn: 0.7699814	total: 49.1s	remaining: 2.58s
370:	learn: 0.7703539	total: 50.3s	remaining: 1.22s
379:	learn: 0.7706770	total: 51.4s	remaining: 0us

image.png

In [42]:
sub_names = ['submission_cb_new_fe1','submission_mean3','submission_mean4','submission_mean5','submission_cb_new_fe6',
             'submission_mean6','submission_mean7','submission_mean8','submission_mean10','submission_mean11',
             'submission_cb_new_fe_final2_selected_fe2','submission_cb_new_fe_final3_selected_fe3','submission_mean12']
submission_list = []
for submission_name in sub_names:
  sub = pd.read_csv(f'{submission_name}.csv',index_col=0)
  submission_list.append(sub) 
sub_df = pd.concat(submission_list,axis=1)
sub_df['mean'] = sub_df.mean(axis=1)
write_to_submission_file(sub_df[['mean']].values,out_file='submission_mean13.csv')

image.png

In [43]:
sub_names = ['submission_cb_new_fe1','submission_mean3','submission_mean4','submission_mean5','submission_cb_new_fe6',
             'submission_mean6','submission_mean7','submission_mean8','submission_mean10','submission_mean11',
             'submission_cb_new_fe_final2_selected_fe2','submission_cb_new_fe_final3_selected_fe3','submission_mean12',
             'submission_cb_new_fe_final3_selected_fe5','submission_cb_new_fe9','submission_cb_new_fe5',
             'submission_cb_new_fe4','submission_cb_new_fe3','submission_mean2','submission_cb_new_fe2',
             'submission_mean1','submission_cb6','submission_cb5','submission_cb3','submission_mean13']
submission_list = []
for submission_name in sub_names:
  sub = pd.read_csv(f'{submission_name}.csv',index_col=0)
  submission_list.append(sub) 
sub_df = pd.concat(submission_list,axis=1)
sub_df['mean'] = sub_df.mean(axis=1)
write_to_submission_file(sub_df[['mean']].values,out_file='submission_mean14.csv')

image.png

Итак, после множества экспериментов с моделью, признаками и усреднением, победил Catboost c использованием множества дополнительных признаков взаимодействия, также с учетом отбора, и в конце с усреднение нескольких ответов. Место на данный момент 4, метрика 0.72909.

Теперь можно придумать простую схему работы с потенциально уходящими клиентами. Сделаем предварительную оценку эффекта.

In [ ]:
# Метрики алгоритма
precision = 0.3
recall=0.3

# Параметры компании 
n_clients = 1e6
av_revenue_per_client = 500
av_churn_prop = 0.05

# Параметры предложения
discount = 0.05
return_rate = 0.4
In [ ]:
# Экономическая модель

n_activations = n_clients*av_churn_prop*recall/precision #примерное количество срабатываний алгоритма


revenue_from_loyal_clients = n_clients*(1-av_churn_prop)*av_revenue_per_client
revenue_from_returned_clients = n_clients*av_churn_prop*av_revenue_per_client*(1-discount)*recall*return_rate

discount_inacc_loss = n_activations*(1-precision)*av_revenue_per_client*discount

# Доход с учетом модели
total_revenue = revenue_from_loyal_clients + revenue_from_returned_clients - discount_inacc_loss

#Экономический эффект от использования модели
model_gain = total_revenue - revenue_from_loyal_clients

model_gain_percent = model_gain/revenue_from_loyal_clients * 100

print(f'Выгода от использования модели: {model_gain/1e6} млн руб./месяц')
print(f'Относительная выгода: {round(model_gain_percent,3)} %')
Выгода от использвания модели: 1.975 млн руб./месяц
Относительная выгода: 0.416 %

Видно, что при использовании модели имеется экономический эффект. Зададим функцию для расчета эффекта и посмотрим, как он будет меняться при изменении параметров модели и предложения. Будем считать, что доля уходящих клиентов известна из средней доли за предыдущие периоды.

In [ ]:
def usage_efficiency_est(precision,recall,n_clients,av_revenue_per_client,av_churn_prop,discount,return_rate):


  n_activations = n_clients*av_churn_prop*recall/precision #примерное количество срабатываний алгоритма


  revenue_from_loyal_clients = n_clients*(1-av_churn_prop)*av_revenue_per_client
  revenue_from_returned_clients = n_clients*av_churn_prop*av_revenue_per_client*(1-discount)*recall*return_rate

  discount_inacc_loss = n_activations*(1-precision)*av_revenue_per_client*discount

  # Доход с учетом модели
  total_revenue = revenue_from_loyal_clients + revenue_from_returned_clients - discount_inacc_loss

  #Экономический эффект от использования модели
  model_gain = total_revenue - revenue_from_loyal_clients

  model_gain_percent = model_gain/revenue_from_loyal_clients * 100

  return model_gain,model_gain_percent 

Оценим долю возвращающихся клиентов при использовании скидки, как min(1, 5*discount). Чтобы обосновать данную зависимость, можно провести опросы среди клиентов. При дальнейшем использовании данная оценка может быть уточнена.

In [ ]:
discounts = np.linspace(0.05,0.3,50)
effects = list(map(lambda discount: usage_efficiency_est(0.3,0.3,1e6,500,0.06,discount,min(1,5*discount))[1],discounts))
plt.plot(discounts,effects)
plt.xlabel('discount proportion')
plt.ylabel('percent of effect');
In [ ]:
trsh =[0.05,0.1,0.15,0.35,0.5]
precisions = [0.1,0.2,0.3,0.4,0.6]
recalls = [0.6,0.5,0.3,0.1,0.01]
model_params = list(zip(precisions,recalls))
effects = list(map(lambda param: usage_efficiency_est(param[0],param[1],1e6,500,0.06,0.15,0.75)[1],model_params))
plt.plot(trsh,effects)
plt.xlabel('threshold')
plt.ylabel('percent of effect');

Итак, для лучшей модели и доле потенциально уходящих клиентов 6%, в первом приближении оптимальными получились порог отнесения к оттоку в 0.15 (пороговая вероятность оттока в 15%) и предложение скидки в 15-20%. При таких параметрах относительная выгода при использовании модели больше 0.5%, что при 500 млн руб. дохода составит более 2,5 млн руб. в месяц. В качестве улучшения, можно давать скидку пропорционально вероятности оттока, тогда, в случае если средняя вероятность оттока для клиентов будет меньше той, которая соответствует скидке в 15-20%, будет экономия при тех же значениях доли возвращающихся клиентов.